Skip to content

Commit 471f793

Browse files
authored
Merge pull request #41 from secure-software-engineering/soot-integration
Improve Soot integration
2 parents 62e42b0 + 98da304 commit 471f793

36 files changed

+2513
-2612
lines changed

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
package de.fraunhofer.iem.swan;
22

33
import de.fraunhofer.iem.swan.cli.SwanOptions;
4+
import de.fraunhofer.iem.swan.data.Method;
45
import de.fraunhofer.iem.swan.features.FeatureSetSelector;
56
import de.fraunhofer.iem.swan.features.IFeatureSet;
6-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
7+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
78
import de.fraunhofer.iem.swan.io.dataset.SrmList;
89
import de.fraunhofer.iem.swan.io.dataset.SrmListUtils;
910
import de.fraunhofer.iem.swan.model.ModelEvaluator;
11+
import de.fraunhofer.iem.swan.soot.Soot;
1012
import de.fraunhofer.iem.swan.util.Util;
1113
import org.slf4j.Logger;
1214
import org.slf4j.LoggerFactory;
1315

1416
import java.io.IOException;
17+
import java.util.Set;
1518

1619
/**
1720
* Runner for SWAN
@@ -38,21 +41,27 @@ public void run() throws IOException, InterruptedException {
3841

3942
long startAnalysisTime = System.currentTimeMillis();
4043

44+
//Run Soot
45+
Soot soot = new Soot(options.getTrainDataDir(), options.getTestDataDir());
46+
4147
// Load methods in training dataset
42-
SrmList dataset = SrmListUtils.importFile(options.getDatasetJson(), options.getTrainDataDir());
43-
logger.info("Loaded {} training methods, distribution={}", dataset.getMethods().size(), Util.countCategories(dataset.getMethods()));
48+
Dataset dataset = new Dataset();
49+
dataset.setTrain(SrmListUtils.importFile(options.getDatasetJson()));
50+
soot.cleanupList(dataset.getTrain());
51+
logger.info("Loaded {} training methods, distribution={}", dataset.getTrainMethods().size(), Util.countCategories(dataset.getTrainMethods()));
4452

4553
//Load methods from the test set
46-
logger.info("Loading test JARs in {}", options.getTestDataDir());
47-
SourceFileLoader testDataset = new SourceFileLoader(options.getTestDataDir());
48-
testDataset.load(dataset.getMethods());
54+
dataset.setTest(new SrmList(options.getTestDataDir()));
55+
Set<Method> testMethods = soot.loadMethods(dataset.getTest().getTestClasses());
56+
dataset.getTest().setMethods(testMethods);
57+
logger.info("Loaded {} methods from {}", testMethods.size(), options.getTestDataDir());
4958

5059
//Initialize and populate features
5160
FeatureSetSelector featureSetSelector = new FeatureSetSelector();
52-
IFeatureSet featureSet = featureSetSelector.select(dataset, testDataset, options);
61+
IFeatureSet featureSet = featureSetSelector.select(dataset, options);
5362

5463
//Train and evaluate model for SRM and CWE categories
55-
ModelEvaluator modelEvaluator = new ModelEvaluator(featureSet, options, testDataset.getMethods());
64+
ModelEvaluator modelEvaluator = new ModelEvaluator(featureSet, options, dataset.getTestMethods());
5665
modelEvaluator.trainModel();
5766

5867
long analysisTime = System.currentTimeMillis() - startAnalysisTime;

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/data/Method.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.fasterxml.jackson.annotation.JsonIgnore;
44
import com.fasterxml.jackson.annotation.JsonProperty;
55
import org.apache.commons.lang3.StringUtils;
6+
import soot.SootClass;
67
import soot.SootMethod;
78
import soot.Type;
89

@@ -39,6 +40,10 @@ public class Method {
3940
private Javadoc javadoc = new Javadoc();
4041
@JsonProperty("jar")
4142
private String sourceJar;
43+
@JsonIgnore
44+
private SootMethod sootMethod;
45+
@JsonIgnore
46+
private SootClass sootClass;
4247

4348
public Method() {
4449
cwe = new HashSet<>();
@@ -329,7 +334,7 @@ public String getJavaSignature() {
329334
if (getName().equals("<init>"))
330335
methodName = getClassName().substring(getClassName().lastIndexOf(".") + 1);
331336

332-
return this.returnType + " " + this.className + "." + methodName + "(" + StringUtils.join(this.parameters, ", ") + ")";
337+
return this.returnType + " " + methodName + "(" + StringUtils.join(this.parameters, ", ") + ")";
333338
}
334339

335340
@JsonIgnore
@@ -354,6 +359,22 @@ public void setSourceJar(String source) {
354359
this.sourceJar = source;
355360
}
356361

362+
public SootMethod getSootMethod() {
363+
return sootMethod;
364+
}
365+
366+
public void setSootMethod(SootMethod sootMethod) {
367+
this.sootMethod = sootMethod;
368+
}
369+
370+
public SootClass getSootClass() {
371+
return sootClass;
372+
}
373+
374+
public void setSootClass(SootClass sootClass) {
375+
this.sootClass = sootClass;
376+
}
377+
357378
@Override
358379
public boolean equals(Object another) {
359380
if (super.equals(another))
@@ -366,7 +387,7 @@ public boolean equals(Object another) {
366387
return false;
367388
if (!this.parameters.equals(otherMethod.parameters))
368389
return false;
369-
return this.className.equals(otherMethod.className);
390+
return this.getClassName().equals(otherMethod.getClassName());
370391
}
371392

372393
@Override

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/FeatureSet.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
import de.fraunhofer.iem.swan.data.Category;
55
import de.fraunhofer.iem.swan.data.Method;
66
import de.fraunhofer.iem.swan.features.code.CodeFeatureHandler;
7-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
87
import de.fraunhofer.iem.swan.features.code.type.IFeature;
98
import de.fraunhofer.iem.swan.features.doc.DocFeatureHandler;
109
import de.fraunhofer.iem.swan.features.doc.manual.IDocFeature;
1110
import de.fraunhofer.iem.swan.features.doc.nlp.AnnotatedMethod;
12-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
11+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
1312
import de.fraunhofer.iem.swan.model.ModelEvaluator;
1413
import weka.core.Attribute;
1514
import weka.core.DenseInstance;
@@ -24,9 +23,8 @@ abstract class FeatureSet {
2423
protected Map<IFeature, Attribute> codeAttributes;
2524
protected final HashMap<String, Integer> instanceMap;
2625
protected final SwanOptions options;
27-
protected SrmList trainData;
26+
protected Dataset dataset;
2827
protected CodeFeatureHandler codeFeatureHandler;
29-
protected SourceFileLoader testData;
3028
protected DocFeatureHandler docFeatureHandler;
3129
protected HashMap<String, Instances> instances;
3230
protected ModelEvaluator.Toolkit toolkit;
@@ -58,11 +56,10 @@ public static FeatureSet.Type getValue(String value) {
5856
}
5957
}
6058

61-
public FeatureSet(SrmList trainData, SourceFileLoader testData, SwanOptions options, ModelEvaluator.Toolkit toolkit) {
59+
public FeatureSet(Dataset dataset, SwanOptions options, ModelEvaluator.Toolkit toolkit) {
6260
this.instanceMap = new HashMap<>();
6361
this.options = options;
64-
this.trainData = trainData;
65-
this.testData = testData;
62+
this.dataset = dataset;
6663
this.toolkit = toolkit;
6764
instances = new HashMap<>();
6865
}
@@ -79,18 +76,18 @@ public List<FeatureSet.Type> initializeFeatures() {
7976
for (FeatureSet.Type featureSet : featureSets)
8077
switch (featureSet) {
8178
case CODE:
82-
codeFeatureHandler = new CodeFeatureHandler(trainData.getClasspath(), testData.getClasspath());
79+
codeFeatureHandler = new CodeFeatureHandler();
8380
codeFeatureHandler.initializeFeatures();
8481
break;
8582
case DOC_MANUAL:
8683

87-
docFeatureHandler = new DocFeatureHandler(trainData.getMethods());
84+
docFeatureHandler = new DocFeatureHandler(dataset.getTrainMethods());
8885
docFeatureHandler.initialiseManualFeatureSet();
8986
docFeatureHandler.evaluateManualFeatureData();
9087
break;
9188
case DOC_AUTO:
9289

93-
docFeatureHandler = new DocFeatureHandler(trainData.getMethods());
90+
docFeatureHandler = new DocFeatureHandler(dataset.getTrainMethods());
9491
docFeatureHandler.initialiseAutomaticFeatureSet();
9592
docFeatureHandler.evaluateAutomaticFeatureData();
9693
break;

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/FeatureSetSelector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
11
package de.fraunhofer.iem.swan.features;
22

33
import de.fraunhofer.iem.swan.cli.SwanOptions;
4-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
5-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
4+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
65
import de.fraunhofer.iem.swan.model.ModelEvaluator;
76

87
public class FeatureSetSelector {
98

10-
public IFeatureSet select(SrmList trainData, SourceFileLoader testData, SwanOptions options) {
9+
public IFeatureSet select(Dataset dataset, SwanOptions options) {
1110

1211
switch (ModelEvaluator.Toolkit.valueOf(options.getToolkit().toUpperCase())) {
1312

1413
case MEKA:
15-
MekaFeatureSet mekaFeatureSet = new MekaFeatureSet(trainData, testData, options);
14+
MekaFeatureSet mekaFeatureSet = new MekaFeatureSet(dataset, options);
1615
mekaFeatureSet.createFeatures();
1716
return mekaFeatureSet;
1817
case WEKA:
19-
WekaFeatureSet wekaFeatureSet = new WekaFeatureSet(trainData, testData, options);
18+
WekaFeatureSet wekaFeatureSet = new WekaFeatureSet(dataset, options);
2019
wekaFeatureSet.createFeatures();
2120
return wekaFeatureSet;
2221
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/MekaFeatureSet.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import de.fraunhofer.iem.swan.cli.SwanOptions;
44
import de.fraunhofer.iem.swan.data.Category;
55
import de.fraunhofer.iem.swan.data.Method;
6-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
7-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
6+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
87
import de.fraunhofer.iem.swan.model.ModelEvaluator;
98
import de.fraunhofer.iem.swan.util.Util;
109
import meka.filters.unsupervised.attribute.MekaClassAttributes;
@@ -16,8 +15,8 @@
1615

1716
public class MekaFeatureSet extends FeatureSet implements IFeatureSet {
1817

19-
public MekaFeatureSet(SrmList trainData, SourceFileLoader testData, SwanOptions options) {
20-
super(trainData, testData, options, ModelEvaluator.Toolkit.MEKA);
18+
public MekaFeatureSet(Dataset dataset, SwanOptions options) {
19+
super(dataset, options, ModelEvaluator.Toolkit.MEKA);
2120
}
2221

2322
/**
@@ -28,13 +27,13 @@ public void createFeatures() {
2827
List<FeatureSet.Type> featureSets = initializeFeatures();
2928

3029
//Create and set attributes for the train instances
31-
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), trainData.getMethods(), featureSets);
32-
Instances trainInstances = createInstances(featureSets, trainAttributes, trainData.getMethods(), getCategories(options.getAllClasses()), "train-instances");
30+
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTrainMethods(), featureSets);
31+
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), getCategories(options.getAllClasses()), "train-instances");
3332
this.instances.put("train", convertToMekaInstances(trainInstances));
3433

3534
//Create and set attributes for the test instances.
36-
ArrayList<Attribute> testAttributes = createAttributes(getCategories(options.getAllClasses()), testData.getMethods(), featureSets);
37-
Instances testInstances = createInstances(featureSets, testAttributes, testData.getMethods(), getCategories(options.getAllClasses()), "test-instances");
35+
ArrayList<Attribute> testAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTestMethods(), featureSets);
36+
Instances testInstances = createInstances(featureSets, testAttributes, dataset.getTestMethods(), getCategories(options.getAllClasses()), "test-instances");
3837
this.instances.put("test", convertToMekaInstances(testInstances));
3938
}
4039

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/WekaFeatureSet.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import de.fraunhofer.iem.swan.cli.SwanOptions;
44
import de.fraunhofer.iem.swan.data.Category;
55
import de.fraunhofer.iem.swan.data.Method;
6-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
7-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
6+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
87
import de.fraunhofer.iem.swan.model.ModelEvaluator;
98
import de.fraunhofer.iem.swan.util.Util;
109
import weka.core.Attribute;
@@ -18,8 +17,8 @@
1817
*/
1918
public class WekaFeatureSet extends FeatureSet implements IFeatureSet {
2019

21-
public WekaFeatureSet(SrmList trainData, SourceFileLoader testData, SwanOptions options) {
22-
super(trainData, testData, options, ModelEvaluator.Toolkit.WEKA);
20+
public WekaFeatureSet(Dataset dataset, SwanOptions options) {
21+
super(dataset, options, ModelEvaluator.Toolkit.WEKA);
2322
}
2423

2524
/**
@@ -32,10 +31,10 @@ public void createFeatures() {
3231
for (Category category : options.getAllClasses().stream().map(Category::fromText).collect(Collectors.toList())) {
3332

3433
//Create and set attributes for the train instances
35-
ArrayList<Attribute> trainAttributes = createAttributes(category, trainData.getMethods(), featureSets);
34+
ArrayList<Attribute> trainAttributes = createAttributes(category, dataset.getTrainMethods(), featureSets);
3635

3736
String instanceName = category.getId().toLowerCase() + "-train-instances";
38-
Instances trainInstances = createInstances(featureSets, trainAttributes, trainData.getMethods(), Collections.singleton(category), instanceName);
37+
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), Collections.singleton(category), instanceName);
3938
this.instances.put(category.getId().toLowerCase(), trainInstances);
4039
Util.exportInstancesToArff(trainInstances);
4140

0 commit comments

Comments
 (0)