2323 * @author Oshando Johnson on 27.09.20
2424 */
2525public class FeaturesHandler {
26- private ArrayList < Attribute > attributes ;
26+
2727 private Map <IFeature , Attribute > codeAttributes ;
2828 private final HashMap <String , Integer > instanceMap ;
2929 private final SwanOptions options ;
3030 private SrmList trainData ;
3131 private CodeFeatureHandler codeFeatureHandler ;
3232 private SourceFileLoader testData ;
3333 private DocFeatureHandler docFeatureHandler ;
34- private ArrayList < Instances > instances ;
34+ private HashMap < String , Instances > instances ;
3535
3636 /**
3737 * Available feature sets:
@@ -65,7 +65,7 @@ public FeaturesHandler(SrmList trainData, SourceFileLoader testData, SwanOptions
6565 this .options = options ;
6666 this .trainData = trainData ;
6767 this .testData = testData ;
68- instances = new ArrayList <>();
68+ instances = new HashMap <>();
6969 }
7070
7171 /**
@@ -98,20 +98,27 @@ public void createFeatures() {
9898 break ;
9999 }
100100
101- for (String category : options .getSrmClasses ()) {
101+ for (String category : options .getAllClasses ()) {
102102
103+ //TRAIN
103104 //Create attributes for feature set
104- attributes = new ArrayList <>();
105- createAttributes (getCategories (category ), trainData .getMethods (), featureSets );
105+ ArrayList <Attribute > trainAttributes = createAttributes (getCategories (category ), trainData .getMethods (), featureSets );
106+
107+ //Set attributes to the train instances.
108+ Instances trainInstances = createInstances (featureSets , Category .fromText (category ), trainAttributes , trainData .getMethods (), category + "-train-instances" );
109+ this .instances .put (category , trainInstances );
110+ Util .exportInstancesToArff (trainInstances );
111+
112+ //TEST
113+ ArrayList <Attribute > testAttributes = createAttributes (getCategories (category ), testData .getMethods (), featureSets );
106114
107- // Set attributes to the train instances.
108- Instances trainingInstances = createInstances (featureSets , Category .fromText (category ));
109- instances .add ( trainingInstances );
110- Util .exportInstancesToArff (trainingInstances );
115+ //Set attributes to the train instances.
116+ Instances testInstances = createInstances (featureSets , Category .fromText (category ), testAttributes , testData . getMethods (), category + "-test-instances" );
117+ //this. instances.put(category, trainInstances );
118+ Util .exportInstancesToArff (testInstances );
111119 }
112120 }
113121
114-
115122 public HashSet <Category > getCategories (String cat ) {
116123
117124 HashSet <Category > categories ;
@@ -132,18 +139,19 @@ public HashSet<Category> getCategories(String cat) {
132139 * @param methods list of training methods
133140 * @param featureSets classification mode
134141 */
135- public void createAttributes (Set <Category > categories , Set <Method > methods , List <FeaturesHandler .FeatureSet > featureSets ) {
142+ public ArrayList < Attribute > createAttributes (Set <Category > categories , Set <Method > methods , List <FeaturesHandler .FeatureSet > featureSets ) {
136143
144+ ArrayList <Attribute > attributes = new ArrayList <>();
137145 //Create feature set and add to attributes
138146 for (FeaturesHandler .FeatureSet featureSet : featureSets )
139147 switch (featureSet ) {
140148
141149 case CODE :
142- addCodeAttributes (categories );
150+ attributes . addAll ( addCodeAttributes (categories ) );
143151 break ;
144152 case DOC_MANUAL :
145153 case DOC_AUTO :
146- addDocAttributes (featureSet );
154+ attributes . addAll ( addDocAttributes (featureSet ) );
147155 break ;
148156 }
149157
@@ -154,15 +162,18 @@ public void createAttributes(Set<Category> categories, Set<Method> methods, List
154162 // Collect classes and add to attributes
155163 Attribute classAttr = new Attribute ("class" , categories .stream ().map (Category ::toString ).collect (Collectors .toList ()));
156164 attributes .add (classAttr );
165+
166+ return attributes ;
157167 }
158168
159169 /**
160170 * Adds SWAN features as attributes to the instance set.
161171 *
162172 * @param categories list of categories
163173 */
164- public void addCodeAttributes (Set <Category > categories ) {
174+ public ArrayList < Attribute > addCodeAttributes (Set <Category > categories ) {
165175
176+ ArrayList <Attribute > attributes = new ArrayList <>();
166177 // Collect the possible values
167178 ArrayList <String > ordinal = new ArrayList <>();
168179 ordinal .add ("true" );
@@ -185,14 +196,17 @@ public void addCodeAttributes(Set<Category> categories) {
185196 }
186197 }
187198 }
199+ return attributes ;
188200 }
189201
190202 /**
191203 * Adds SWAN-DOC features as attributes to the instance set.
192204 *
193205 * @param instanceSet classification mode
194206 */
195- public void addDocAttributes (FeaturesHandler .FeatureSet instanceSet ) {
207+ public ArrayList <Attribute > addDocAttributes (FeaturesHandler .FeatureSet instanceSet ) {
208+
209+ ArrayList <Attribute > attributes = new ArrayList <>();
196210
197211 switch (instanceSet ) {
198212 case DOC_MANUAL :
@@ -210,22 +224,22 @@ public void addDocAttributes(FeaturesHandler.FeatureSet instanceSet) {
210224 }
211225 break ;
212226 }
227+ return attributes ;
213228 }
214229
230+ public Instances createInstances (List <FeaturesHandler .FeatureSet > featureSets , Category category , ArrayList <Attribute > attributes , Set <Method > methods , String name ) {
215231
216- public Instances createInstances (List <FeaturesHandler .FeatureSet > featureSets , Category category ) {
217-
218- Instances instances = new Instances (category .toString () + "-methods-instances" , attributes , 0 );
232+ Instances instances = new Instances (name , attributes , 0 );
219233 instances .setClass (instances .attribute ("class" ));
220234
221235 for (FeaturesHandler .FeatureSet featureSet : featureSets )
222236 switch (featureSet ) {
223237 case CODE :
224- instances .addAll (getCodeInstances (instances , trainData . getMethods () , category ));
238+ instances .addAll (getCodeInstances (instances , methods , category , attributes ));
225239 break ;
226240 case DOC_MANUAL :
227241 case DOC_AUTO :
228- instances .addAll (getDocInstances (instances , trainData . getMethods () , category , featureSet ));
242+ instances .addAll (getDocInstances (instances , methods , category , featureSet , attributes ));
229243 break ;
230244 }
231245 return instances ;
@@ -238,7 +252,7 @@ public Instances createInstances(List<FeaturesHandler.FeatureSet> featureSets, C
238252 * @param methods training set
239253 * @return instance set containing data from SWAN
240254 */
241- public ArrayList <Instance > getCodeInstances (Instances instances , Set <Method > methods , Category category ) {
255+ public ArrayList <Instance > getCodeInstances (Instances instances , Set <Method > methods , Category category , ArrayList < Attribute > attributes ) {
242256
243257 ArrayList <Instance > instanceList = new ArrayList <>();
244258
@@ -249,7 +263,9 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
249263
250264 Instance inst = new DenseInstance (attributes .size ());
251265 inst .setDataset (instances );
252- inst .setClassValue (getCategory (method , category ));
266+
267+ if (method .getSrm () != null || method .getCwe () != null )
268+ inst .setClassValue (getCategory (method , category ));
253269
254270 for (Map .Entry <IFeature , Attribute > entry : codeAttributes .entrySet ()) {
255271 switch (entry .getKey ().applies (method )) {
@@ -279,7 +295,7 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
279295 * @param instances instance srt
280296 * @return Instances containing data from SWAN-DOC
281297 */
282- public ArrayList <Instance > getDocInstances (Instances instances , Set <Method > methods , Category category , FeaturesHandler .FeatureSet instanceSet ) {
298+ public ArrayList <Instance > getDocInstances (Instances instances , Set <Method > methods , Category category , FeaturesHandler .FeatureSet instanceSet , ArrayList < Attribute > attributes ) {
283299
284300 ArrayList <Instance > instanceList = new ArrayList <>();
285301
@@ -295,7 +311,9 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
295311 inst = new DenseInstance (attributes .size ());
296312 inst .setDataset (instances );
297313 isNewInstance = true ;
298- inst .setClassValue (getCategory (method , category ));
314+
315+ if (method .getSrm () != null || method .getCwe () != null )
316+ inst .setClassValue (getCategory (method , category ));
299317 inst .setValue (instances .attribute ("id" ), method .getArffSafeSignature ());
300318 }
301319
@@ -307,6 +325,8 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
307325 try {
308326 IDocFeature javadocFeature = feature .newInstance ();
309327 AnnotatedMethod annotatedMethod = docFeatureHandler .getManualFeatureData ().get (method .getSignature ());
328+
329+ if (annotatedMethod !=null )
310330 inst .setValue (instances .attribute (feature .getSimpleName ()), javadocFeature .evaluate (annotatedMethod ).getTotalValue ());
311331 } catch (InstantiationException | IllegalAccessException e ) {
312332 e .printStackTrace ();
@@ -330,7 +350,7 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
330350 /**
331351 * Checks if method belongs to the specified category.
332352 *
333- * @param method test or training method
353+ * @param method test or training method
334354 * @param category SRM or CWE class being evaluated
335355 * @return string representation of the method
336356 */
@@ -350,11 +370,11 @@ else if (category.toString().contains("authentication")) {
350370 return Category .NONE .toString ();
351371 }
352372
353- public ArrayList < Instances > getInstances () {
373+ public HashMap < String , Instances > getInstances () {
354374 return instances ;
355375 }
356376
357- public void setInstances (ArrayList < Instances > instances ) {
377+ public void setInstances (HashMap < String , Instances > instances ) {
358378 this .instances = instances ;
359379 }
360380
0 commit comments