55import de .fraunhofer .iem .swan .data .Method ;
66import de .fraunhofer .iem .swan .features .code .CodeFeatureHandler ;
77import de .fraunhofer .iem .swan .features .code .soot .SourceFileLoader ;
8+ import de .fraunhofer .iem .swan .features .code .type .IFeature ;
89import de .fraunhofer .iem .swan .features .doc .DocFeatureHandler ;
910import de .fraunhofer .iem .swan .features .doc .manual .IDocFeature ;
1011import de .fraunhofer .iem .swan .features .doc .nlp .AnnotatedMethod ;
11- import de .fraunhofer .iem .swan .features .code .type .IFeature ;
1212import de .fraunhofer .iem .swan .io .dataset .SrmList ;
13- import de .fraunhofer .iem .swan .util . Util ;
13+ import de .fraunhofer .iem .swan .model . ModelEvaluator ;
1414import weka .core .Attribute ;
1515import weka .core .DenseInstance ;
1616import weka .core .Instance ;
1919import java .util .*;
2020import java .util .stream .Collectors ;
2121
22- /**
23- * @author Oshando Johnson on 27.09.20
24- */
25- public class FeaturesHandler {
22+ abstract class FeatureSet {
2623
27- private Map <IFeature , Attribute > codeAttributes ;
28- private final HashMap <String , Integer > instanceMap ;
29- private final SwanOptions options ;
30- private SrmList trainData ;
31- private CodeFeatureHandler codeFeatureHandler ;
32- private SourceFileLoader testData ;
33- private DocFeatureHandler docFeatureHandler ;
34- private HashMap <String , Instances > instances ;
24+ protected Map <IFeature , Attribute > codeAttributes ;
25+ protected final HashMap <String , Integer > instanceMap ;
26+ protected final SwanOptions options ;
27+ protected SrmList trainData ;
28+ protected CodeFeatureHandler codeFeatureHandler ;
29+ protected SourceFileLoader testData ;
30+ protected DocFeatureHandler docFeatureHandler ;
31+ protected HashMap <String , Instances > instances ;
32+ protected ModelEvaluator .Mode mode ;
3533
3634 /**
3735 * Available feature sets:
3836 * CODE: source code features
3937 * DOC_MANUAL: Javadoc manual features
4038 * DOC_AUTO: Javadoc automatic (word embedding) features
4139 */
42- public enum FeatureSet {
40+ public enum Type {
4341 CODE ("CODE" ),
4442 DOC_AUTO ("DOC-AUTO" ),
4543 DOC_MANUAL ("DOC-MANUAL" );
4644
4745 private final String value ;
4846
49- FeatureSet (String value ) {
47+ Type (String value ) {
5048 this .value = value ;
5149 }
5250
53- public static FeaturesHandler . FeatureSet getValue (String value ) {
54- for (FeaturesHandler . FeatureSet featureSet : FeaturesHandler . FeatureSet .values ()) {
51+ public static FeatureSet . Type getValue (String value ) {
52+ for (FeatureSet . Type featureSet : FeatureSet . Type .values ()) {
5553 if (featureSet .value .contains (value )) {
5654 return featureSet ;
5755 }
5856 }
59- return null ;// not found
57+ return null ;
6058 }
6159 }
6260
63- public FeaturesHandler (SrmList trainData , SourceFileLoader testData , SwanOptions options ) {
61+ public FeatureSet (SrmList trainData , SourceFileLoader testData , SwanOptions options , ModelEvaluator . Mode mode ) {
6462 this .instanceMap = new HashMap <>();
6563 this .options = options ;
6664 this .trainData = trainData ;
6765 this .testData = testData ;
66+ this .mode = mode ;
6867 instances = new HashMap <>();
6968 }
7069
7170 /**
7271 *
7372 */
74- public void createFeatures () {
73+ public List < FeatureSet . Type > initializeFeatures () {
7574
76- List <FeaturesHandler . FeatureSet > featureSets = options .getFeatureSet ().stream ()
77- .map (f -> FeaturesHandler . FeatureSet .getValue (f .toUpperCase ()))
75+ List <FeatureSet . Type > featureSets = options .getFeatureSet ().stream ()
76+ .map (f -> FeatureSet . Type .getValue (f .toUpperCase ()))
7877 .collect (Collectors .toList ());
7978
80- //Creat
81- for (FeaturesHandler .FeatureSet featureSet : featureSets )
79+ for (FeatureSet .Type featureSet : featureSets )
8280 switch (featureSet ) {
8381 case CODE :
8482 codeFeatureHandler = new CodeFeatureHandler (trainData .getClasspath (), testData .getClasspath ());
@@ -98,38 +96,7 @@ public void createFeatures() {
9896 break ;
9997 }
10098
101- for (String category : options .getAllClasses ()) {
102-
103- //TRAIN
104- //Create attributes for feature set
105- ArrayList <Attribute > trainAttributes = createAttributes (getCategories (category ), trainData .getMethods (), featureSets );
106-
107- //Set attributes to the train instances.
108- Instances trainInstances = createInstances (featureSets , Category .fromText (category ), trainAttributes , trainData .getMethods (), category + "-train-instances" );
109- this .instances .put (category , trainInstances );
110- Util .exportInstancesToArff (trainInstances );
111-
112- //TEST
113- ArrayList <Attribute > testAttributes = createAttributes (getCategories (category ), testData .getMethods (), featureSets );
114-
115- //Set attributes to the train instances.
116- Instances testInstances = createInstances (featureSets , Category .fromText (category ), testAttributes , testData .getMethods (), category + "-test-instances" );
117- //this.instances.put(category, trainInstances);
118- Util .exportInstancesToArff (testInstances );
119- }
120- }
121-
122- public HashSet <Category > getCategories (String cat ) {
123-
124- HashSet <Category > categories ;
125-
126- if (cat .contentEquals ("authentication" ))
127- categories = new HashSet <>(Arrays .asList (Category .AUTHENTICATION_TO_HIGH ,
128- Category .AUTHENTICATION_TO_LOW , Category .AUTHENTICATION_NEUTRAL , Category .NONE ));
129- else
130- categories = new HashSet <>(Arrays .asList (Category .fromText (cat ), Category .NONE ));
131-
132- return categories ;
99+ return featureSets ;
133100 }
134101
135102 /**
@@ -139,11 +106,16 @@ public HashSet<Category> getCategories(String cat) {
139106 * @param methods list of training methods
140107 * @param featureSets classification mode
141108 */
142- public ArrayList <Attribute > createAttributes (Set <Category > categories , Set <Method > methods , List <FeaturesHandler . FeatureSet > featureSets ) {
109+ public ArrayList <Attribute > createAttributes (Set <Category > categories , Set <Method > methods , List <FeatureSet . Type > featureSets ) {
143110
144111 ArrayList <Attribute > attributes = new ArrayList <>();
112+
113+ // Add method signatures as id attribute
114+ Attribute idAttr = new Attribute ("id" , methods .stream ().map (Method ::getArffSafeSignature ).collect (Collectors .toList ()));
115+ attributes .add (idAttr );
116+
145117 //Create feature set and add to attributes
146- for (FeaturesHandler . FeatureSet featureSet : featureSets )
118+ for (FeatureSet . Type featureSet : featureSets )
147119 switch (featureSet ) {
148120
149121 case CODE :
@@ -155,14 +127,6 @@ public ArrayList<Attribute> createAttributes(Set<Category> categories, Set<Metho
155127 break ;
156128 }
157129
158- // Add method signatures as id attribute
159- Attribute idAttr = new Attribute ("id" , methods .stream ().map (Method ::getArffSafeSignature ).collect (Collectors .toList ()));
160- attributes .add (idAttr );
161-
162- // Collect classes and add to attributes
163- Attribute classAttr = new Attribute ("class" , categories .stream ().map (Category ::toString ).collect (Collectors .toList ()));
164- attributes .add (classAttr );
165-
166130 return attributes ;
167131 }
168132
@@ -204,7 +168,7 @@ public ArrayList<Attribute> addCodeAttributes(Set<Category> categories) {
204168 *
205169 * @param instanceSet classification mode
206170 */
207- public ArrayList <Attribute > addDocAttributes (FeaturesHandler . FeatureSet instanceSet ) {
171+ public ArrayList <Attribute > addDocAttributes (FeatureSet . Type instanceSet ) {
208172
209173 ArrayList <Attribute > attributes = new ArrayList <>();
210174
@@ -227,32 +191,33 @@ public ArrayList<Attribute> addDocAttributes(FeaturesHandler.FeatureSet instance
227191 return attributes ;
228192 }
229193
230- public Instances createInstances (List <FeaturesHandler .FeatureSet > featureSets , Category category , ArrayList <Attribute > attributes , Set <Method > methods , String name ) {
194+ public Instances createInstances (List <Type > featureSets , ArrayList <Attribute > attributes ,
195+ Set <Method > methods , Set <Category > categories , String name ) {
231196
232197 Instances instances = new Instances (name , attributes , 0 );
233- instances .setClass (instances .attribute ("class" ));
234198
235- for (FeaturesHandler . FeatureSet featureSet : featureSets )
199+ for (FeatureSet . Type featureSet : featureSets )
236200 switch (featureSet ) {
237201 case CODE :
238- instances .addAll (getCodeInstances (instances , methods , category , attributes ));
202+ instances .addAll (getCodeInstances (instances , methods , categories , attributes ));
239203 break ;
240204 case DOC_MANUAL :
241205 case DOC_AUTO :
242- instances .addAll (getDocInstances (instances , methods , category , featureSet , attributes ));
206+ instances .addAll (getDocInstances (instances , methods , categories , featureSet , attributes ));
243207 break ;
244208 }
245209 return instances ;
246210 }
247211
212+
248213 /**
249214 * Adds data for SWAN features to instance set.
250215 *
251216 * @param instances instance srt
252217 * @param methods training set
253218 * @return instance set containing data from SWAN
254219 */
255- public ArrayList <Instance > getCodeInstances (Instances instances , Set <Method > methods , Category category , ArrayList <Attribute > attributes ) {
220+ public ArrayList <Instance > getCodeInstances (Instances instances , Set <Method > methods , Set < Category > categories , ArrayList <Attribute > attributes ) {
256221
257222 ArrayList <Instance > instanceList = new ArrayList <>();
258223
@@ -264,10 +229,34 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
264229 Instance inst = new DenseInstance (attributes .size ());
265230 inst .setDataset (instances );
266231
267- if (method .getSrm () != null || method .getCwe () != null )
268- inst .setClassValue (getCategory (method , category ));
232+ for (Category cat : categories ) {
233+ if (cat .isAuthentication () && !method .getAuthSrm ().isEmpty ()) {
234+
235+ if (mode == ModelEvaluator .Mode .MEKA )
236+ inst .setValue (instances .attribute (cat .getId ()), "1" );
237+ else {
238+ for (Category auth : method .getAuthSrm ()) {
239+ switch (auth ) {
240+ case AUTHENTICATION_TO_LOW :
241+ inst .setValue (instances .attribute (cat .getId ()), "1" );
242+ break ;
243+ case AUTHENTICATION_NEUTRAL :
244+ inst .setValue (instances .attribute (cat .getId ()), "2" );
245+ break ;
246+ case AUTHENTICATION_TO_HIGH :
247+ inst .setValue (instances .attribute (cat .getId ()), "3" );
248+ break ;
249+ }
250+ }
251+ }
252+ } else if (method .getAllCategories ().contains (cat )) {
253+ inst .setValue (instances .attribute (cat .getId ()), "1" );
254+ } else
255+ inst .setValue (instances .attribute (cat .getId ()), "0" );
256+ }
269257
270258 for (Map .Entry <IFeature , Attribute > entry : codeAttributes .entrySet ()) {
259+
271260 switch (entry .getKey ().applies (method )) {
272261 case TRUE :
273262 inst .setValue (entry .getValue (), "true" );
@@ -295,7 +284,8 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
295284 * @param instances instance srt
296285 * @return Instances containing data from SWAN-DOC
297286 */
298- public ArrayList <Instance > getDocInstances (Instances instances , Set <Method > methods , Category category , FeaturesHandler .FeatureSet instanceSet , ArrayList <Attribute > attributes ) {
287+ public ArrayList <Instance > getDocInstances (Instances instances , Set <Method > methods , Set <Category > categories ,
288+ FeatureSet .Type instanceSet , ArrayList <Attribute > attributes ) {
299289
300290 ArrayList <Instance > instanceList = new ArrayList <>();
301291
@@ -312,12 +302,25 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
312302 inst .setDataset (instances );
313303 isNewInstance = true ;
314304
315- if (method .getSrm () != null || method .getCwe () != null )
316- inst .setClassValue (getCategory (method , category ));
305+ switch (mode ) {
306+ case MEKA :
307+ for (Category cat : categories ) {
308+ if (method .getAllCategories ().contains (cat ) || (cat .isAuthentication () && !method .getAuthSrm ().isEmpty ())) {
309+ inst .setValue (instances .attribute (cat .getId ()), "1" );
310+ } else
311+ inst .setValue (instances .attribute (cat .getId ()), "0" );
312+ }
313+ break ;
314+
315+ case WEKA :
316+ if (method .getSrm () != null || method .getCwe () != null )
317+ // inst.setClassValue(getCategory(method, categories));
318+ break ;
319+ }
320+
317321 inst .setValue (instances .attribute ("id" ), method .getArffSafeSignature ());
318322 }
319323
320-
321324 switch (instanceSet ) {
322325 case DOC_MANUAL :
323326 for (Class <? extends IDocFeature > feature : docFeatureHandler .getManualFeatureSet ()) {
@@ -326,8 +329,8 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
326329 IDocFeature javadocFeature = feature .newInstance ();
327330 AnnotatedMethod annotatedMethod = docFeatureHandler .getManualFeatureData ().get (method .getSignature ());
328331
329- if (annotatedMethod != null )
330- inst .setValue (instances .attribute (feature .getSimpleName ()), javadocFeature .evaluate (annotatedMethod ).getTotalValue ());
332+ if (annotatedMethod != null )
333+ inst .setValue (instances .attribute (feature .getSimpleName ()), javadocFeature .evaluate (annotatedMethod ).getTotalValue ());
331334 } catch (InstantiationException | IllegalAccessException e ) {
332335 e .printStackTrace ();
333336 }
@@ -374,6 +377,15 @@ public HashMap<String, Instances> getInstances() {
374377 return instances ;
375378 }
376379
380+ public Instances getTrainInstances () {
381+ return instances .get ("train" );
382+ }
383+
384+ public Instances getTestInstances () {
385+ return instances .get ("test" );
386+ }
387+
388+
377389 public void setInstances (HashMap <String , Instances > instances ) {
378390 this .instances = instances ;
379391 }
@@ -393,4 +405,4 @@ public DocFeatureHandler getDocFeatureHandler() {
393405 public void setDocFeatureHandler (DocFeatureHandler docFeatureHandler ) {
394406 this .docFeatureHandler = docFeatureHandler ;
395407 }
396- }
408+ }
0 commit comments