Skip to content

Commit 7d9d38a

Browse files
committed
Refactor model evaluation
1 parent eb10e8b commit 7d9d38a

File tree

7 files changed

+123
-147
lines changed

7 files changed

+123
-147
lines changed

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void run() throws IOException, InterruptedException {
5252
featuresHandler.createFeatures();
5353

5454
//Train and evaluate model for SRM and CWE categories
55-
ModelEvaluator modelEvaluator = new ModelEvaluator(featuresHandler, options.getLearningMode(), options.getIterations(), options.getTrainTestSplit());
55+
ModelEvaluator modelEvaluator = new ModelEvaluator(featuresHandler, options);
5656
modelEvaluator.trainModel();
5757

5858
//TODO export final list to JSON file

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/cli/SwanOptions.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package de.fraunhofer.iem.swan.cli;
22

3+
import java.util.Collection;
34
import java.util.List;
5+
import java.util.stream.Collectors;
6+
import java.util.stream.Stream;
47

58
/**
69
* POJO for SWAN command line options.
@@ -101,6 +104,10 @@ public void setCweClasses(List<String> cweClasses) {
101104
this.cweClasses = cweClasses;
102105
}
103106

107+
public List<String> getAllClasses(){
108+
return Stream.of(srmClasses,cweClasses).flatMap(Collection::stream).collect(Collectors.toList());
109+
}
110+
104111
public boolean isExportArffData() {
105112
return exportArffData;
106113
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/data/Method.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
import soot.SootMethod;
77
import soot.Type;
88

9-
import java.util.ArrayList;
10-
import java.util.HashSet;
11-
import java.util.List;
12-
import java.util.Set;
9+
import java.util.*;
1310
import java.util.stream.Collectors;
11+
import java.util.stream.Stream;
1412

1513
/**
1614
* Class representing a single method
@@ -168,6 +166,11 @@ public Set<Category> getCwe() {
168166
return this.cwe;
169167
}
170168

169+
170+
public Set<Category> getAllCategories(){
171+
return Stream.of(srm,cwe).flatMap(Collection::stream).collect(Collectors.toSet());
172+
}
173+
171174
public void addCategoryClassified(Category category) {
172175
this.cwe.add(category);
173176
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/FeaturesHandler.java

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@
2323
* @author Oshando Johnson on 27.09.20
2424
*/
2525
public class FeaturesHandler {
26-
private ArrayList<Attribute> attributes;
26+
2727
private Map<IFeature, Attribute> codeAttributes;
2828
private final HashMap<String, Integer> instanceMap;
2929
private final SwanOptions options;
3030
private SrmList trainData;
3131
private CodeFeatureHandler codeFeatureHandler;
3232
private SourceFileLoader testData;
3333
private DocFeatureHandler docFeatureHandler;
34-
private ArrayList<Instances> instances;
34+
private HashMap<String, Instances> instances;
3535

3636
/**
3737
* Available feature sets:
@@ -65,7 +65,7 @@ public FeaturesHandler(SrmList trainData, SourceFileLoader testData, SwanOptions
6565
this.options = options;
6666
this.trainData = trainData;
6767
this.testData = testData;
68-
instances = new ArrayList<>();
68+
instances = new HashMap<>();
6969
}
7070

7171
/**
@@ -98,20 +98,27 @@ public void createFeatures() {
9898
break;
9999
}
100100

101-
for (String category : options.getSrmClasses()) {
101+
for (String category : options.getAllClasses()) {
102102

103+
//TRAIN
103104
//Create attributes for feature set
104-
attributes = new ArrayList<>();
105-
createAttributes(getCategories(category), trainData.getMethods(), featureSets);
105+
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(category), trainData.getMethods(), featureSets);
106+
107+
//Set attributes to the train instances.
108+
Instances trainInstances = createInstances(featureSets, Category.fromText(category), trainAttributes, trainData.getMethods(), category + "-train-instances");
109+
this.instances.put(category, trainInstances);
110+
Util.exportInstancesToArff(trainInstances);
111+
112+
//TEST
113+
ArrayList<Attribute> testAttributes = createAttributes(getCategories(category), testData.getMethods(), featureSets);
106114

107-
// Set attributes to the train instances.
108-
Instances trainingInstances = createInstances(featureSets, Category.fromText(category));
109-
instances.add(trainingInstances);
110-
Util.exportInstancesToArff(trainingInstances);
115+
//Set attributes to the train instances.
116+
Instances testInstances = createInstances(featureSets, Category.fromText(category), testAttributes, testData.getMethods(), category + "-test-instances");
117+
//this.instances.put(category, trainInstances);
118+
Util.exportInstancesToArff(testInstances);
111119
}
112120
}
113121

114-
115122
public HashSet<Category> getCategories(String cat) {
116123

117124
HashSet<Category> categories;
@@ -132,18 +139,19 @@ public HashSet<Category> getCategories(String cat) {
132139
* @param methods list of training methods
133140
* @param featureSets classification mode
134141
*/
135-
public void createAttributes(Set<Category> categories, Set<Method> methods, List<FeaturesHandler.FeatureSet> featureSets) {
142+
public ArrayList<Attribute> createAttributes(Set<Category> categories, Set<Method> methods, List<FeaturesHandler.FeatureSet> featureSets) {
136143

144+
ArrayList<Attribute> attributes = new ArrayList<>();
137145
//Create feature set and add to attributes
138146
for (FeaturesHandler.FeatureSet featureSet : featureSets)
139147
switch (featureSet) {
140148

141149
case CODE:
142-
addCodeAttributes(categories);
150+
attributes.addAll(addCodeAttributes(categories));
143151
break;
144152
case DOC_MANUAL:
145153
case DOC_AUTO:
146-
addDocAttributes(featureSet);
154+
attributes.addAll(addDocAttributes(featureSet));
147155
break;
148156
}
149157

@@ -154,15 +162,18 @@ public void createAttributes(Set<Category> categories, Set<Method> methods, List
154162
// Collect classes and add to attributes
155163
Attribute classAttr = new Attribute("class", categories.stream().map(Category::toString).collect(Collectors.toList()));
156164
attributes.add(classAttr);
165+
166+
return attributes;
157167
}
158168

159169
/**
160170
* Adds SWAN features as attributes to the instance set.
161171
*
162172
* @param categories list of categories
163173
*/
164-
public void addCodeAttributes(Set<Category> categories) {
174+
public ArrayList<Attribute> addCodeAttributes(Set<Category> categories) {
165175

176+
ArrayList<Attribute> attributes = new ArrayList<>();
166177
// Collect the possible values
167178
ArrayList<String> ordinal = new ArrayList<>();
168179
ordinal.add("true");
@@ -185,14 +196,17 @@ public void addCodeAttributes(Set<Category> categories) {
185196
}
186197
}
187198
}
199+
return attributes;
188200
}
189201

190202
/**
191203
* Adds SWAN-DOC features as attributes to the instance set.
192204
*
193205
* @param instanceSet classification mode
194206
*/
195-
public void addDocAttributes(FeaturesHandler.FeatureSet instanceSet) {
207+
public ArrayList<Attribute> addDocAttributes(FeaturesHandler.FeatureSet instanceSet) {
208+
209+
ArrayList<Attribute> attributes = new ArrayList<>();
196210

197211
switch (instanceSet) {
198212
case DOC_MANUAL:
@@ -210,22 +224,22 @@ public void addDocAttributes(FeaturesHandler.FeatureSet instanceSet) {
210224
}
211225
break;
212226
}
227+
return attributes;
213228
}
214229

230+
public Instances createInstances(List<FeaturesHandler.FeatureSet> featureSets, Category category, ArrayList<Attribute> attributes, Set<Method> methods, String name) {
215231

216-
public Instances createInstances(List<FeaturesHandler.FeatureSet> featureSets, Category category) {
217-
218-
Instances instances = new Instances(category.toString() + "-methods-instances", attributes, 0);
232+
Instances instances = new Instances(name, attributes, 0);
219233
instances.setClass(instances.attribute("class"));
220234

221235
for (FeaturesHandler.FeatureSet featureSet : featureSets)
222236
switch (featureSet) {
223237
case CODE:
224-
instances.addAll(getCodeInstances(instances, trainData.getMethods(), category));
238+
instances.addAll(getCodeInstances(instances, methods, category, attributes));
225239
break;
226240
case DOC_MANUAL:
227241
case DOC_AUTO:
228-
instances.addAll(getDocInstances(instances, trainData.getMethods(), category, featureSet));
242+
instances.addAll(getDocInstances(instances, methods, category, featureSet, attributes));
229243
break;
230244
}
231245
return instances;
@@ -238,7 +252,7 @@ public Instances createInstances(List<FeaturesHandler.FeatureSet> featureSets, C
238252
* @param methods training set
239253
* @return instance set containing data from SWAN
240254
*/
241-
public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> methods, Category category) {
255+
public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> methods, Category category, ArrayList<Attribute> attributes) {
242256

243257
ArrayList<Instance> instanceList = new ArrayList<>();
244258

@@ -249,7 +263,9 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
249263

250264
Instance inst = new DenseInstance(attributes.size());
251265
inst.setDataset(instances);
252-
inst.setClassValue(getCategory(method, category));
266+
267+
if (method.getSrm() != null || method.getCwe() != null)
268+
inst.setClassValue(getCategory(method, category));
253269

254270
for (Map.Entry<IFeature, Attribute> entry : codeAttributes.entrySet()) {
255271
switch (entry.getKey().applies(method)) {
@@ -279,7 +295,7 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
279295
* @param instances instance srt
280296
* @return Instances containing data from SWAN-DOC
281297
*/
282-
public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> methods, Category category, FeaturesHandler.FeatureSet instanceSet) {
298+
public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> methods, Category category, FeaturesHandler.FeatureSet instanceSet, ArrayList<Attribute> attributes) {
283299

284300
ArrayList<Instance> instanceList = new ArrayList<>();
285301

@@ -295,7 +311,9 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
295311
inst = new DenseInstance(attributes.size());
296312
inst.setDataset(instances);
297313
isNewInstance = true;
298-
inst.setClassValue(getCategory(method, category));
314+
315+
if (method.getSrm() != null || method.getCwe() != null)
316+
inst.setClassValue(getCategory(method, category));
299317
inst.setValue(instances.attribute("id"), method.getArffSafeSignature());
300318
}
301319

@@ -307,6 +325,8 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
307325
try {
308326
IDocFeature javadocFeature = feature.newInstance();
309327
AnnotatedMethod annotatedMethod = docFeatureHandler.getManualFeatureData().get(method.getSignature());
328+
329+
if(annotatedMethod!=null)
310330
inst.setValue(instances.attribute(feature.getSimpleName()), javadocFeature.evaluate(annotatedMethod).getTotalValue());
311331
} catch (InstantiationException | IllegalAccessException e) {
312332
e.printStackTrace();
@@ -330,7 +350,7 @@ public ArrayList<Instance> getDocInstances(Instances instances, Set<Method> meth
330350
/**
331351
* Checks if method belongs to the specified category.
332352
*
333-
* @param method test or training method
353+
* @param method test or training method
334354
* @param category SRM or CWE class being evaluated
335355
* @return string representation of the method
336356
*/
@@ -350,11 +370,11 @@ else if (category.toString().contains("authentication")) {
350370
return Category.NONE.toString();
351371
}
352372

353-
public ArrayList<Instances> getInstances() {
373+
public HashMap<String, Instances> getInstances() {
354374
return instances;
355375
}
356376

357-
public void setInstances(ArrayList<Instances> instances) {
377+
public void setInstances(HashMap<String, Instances> instances) {
358378
this.instances = instances;
359379
}
360380

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/model/MLPlanExecutor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public MLPlanExecutor() {
4747
*
4848
* @param instances1 file path for ARFF file
4949
*/
50-
public HashMap<String, String> evaluateDataset(Instances instances1) {
50+
public HashMap<String, ArrayList<Double>> evaluateDataset(Instances instances1) {
5151

5252
String arffFilePath = Util.exportInstancesToArff(instances1);
5353

@@ -75,7 +75,7 @@ public HashMap<String, String> evaluateDataset(Instances instances1) {
7575
loader.setFile(new File(arffFilePath));
7676
Instances instances = loader.getDataSet();
7777
instances.setClassIndex(instances.numAttributes() - 1);
78-
monteCarloValidator.initializeResultSet(instances);
78+
// monteCarloValidator.initializeResultSet(instances);
7979
} catch (IOException e) {
8080
e.printStackTrace();
8181
}

0 commit comments

Comments
 (0)