Skip to content

Commit e12b7ca

Browse files
committed
Load dataset from ARFF file instead calculating features for each run
1 parent d85f323 commit e12b7ca

File tree

10 files changed

+106
-62
lines changed

10 files changed

+106
-62
lines changed

swan-pipeline/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
<groupId>edu.stanford.nlp</groupId>
107107
<artifactId>stanford-corenlp</artifactId>
108108
<version>4.3.0</version>
109-
<classifier>models</classifier>
109+
<classifier>models-english</classifier>
110110
</dependency>
111111
<dependency>
112112
<groupId>org.jsoup</groupId>

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@ public void run() throws IOException, InterruptedException {
4747
// Load methods in training dataset
4848
Dataset dataset = new Dataset();
4949
dataset.setTrain(SrmListUtils.importFile(options.getDatasetJson()));
50-
soot.cleanupList(dataset.getTrain());
50+
51+
if (!options.getTrainDataDir().isEmpty())
52+
soot.cleanupList(dataset.getTrain());
53+
5154
logger.info("Loaded {} training methods, distribution={}", dataset.getTrainMethods().size(), Util.countCategories(dataset.getTrainMethods()));
5255

5356
//Load methods from the test set

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/FeatureSet.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,9 @@ public ArrayList<Attribute> addDocAttributes(FeatureSet.Type instanceSet) {
188188
return attributes;
189189
}
190190

191-
public Instances createInstances(List<Type> featureSets, ArrayList<Attribute> attributes,
192-
Set<Method> methods, Set<Category> categories, String name) {
193191

194-
Instances instances = new Instances(name, attributes, 0);
192+
public Instances createInstances(Instances instances, List<Type> featureSets, ArrayList<Attribute> attributes,
193+
Set<Method> methods, Set<Category> categories) {
195194

196195
for (FeatureSet.Type featureSet : featureSets)
197196
switch (featureSet) {
@@ -207,6 +206,15 @@ public Instances createInstances(List<Type> featureSets, ArrayList<Attribute> at
207206
}
208207

209208

209+
public Instances createInstances(List<Type> featureSets, ArrayList<Attribute> attributes,
210+
Set<Method> methods, Set<Category> categories) {
211+
212+
Instances instances = new Instances("swan-srm", attributes, 0);
213+
214+
return createInstances(instances, featureSets, attributes, methods, categories);
215+
}
216+
217+
210218
/**
211219
* Adds data for SWAN features to instance set.
212220
*
@@ -256,13 +264,13 @@ public ArrayList<Instance> getCodeInstances(Instances instances, Set<Method> met
256264

257265
switch (entry.getKey().applies(method)) {
258266
case TRUE:
259-
inst.setValue(entry.getValue(), "true");
267+
inst.setValue(instances.attribute(String.valueOf(entry.getKey())), "true");
260268
break;
261269
case FALSE:
262-
inst.setValue(entry.getValue(), "false");
270+
inst.setValue(instances.attribute(String.valueOf(entry.getKey())), "false");
263271
break;
264272
default:
265-
inst.setMissing(entry.getValue());
273+
inst.setMissing(instances.attribute(String.valueOf(entry.getKey())));
266274
}
267275
}
268276

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/MekaFeatureSet.java

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,13 @@
99
import meka.filters.unsupervised.attribute.MekaClassAttributes;
1010
import weka.core.Attribute;
1111
import weka.core.Instances;
12+
import weka.core.converters.ArffLoader;
1213
import weka.filters.Filter;
1314

15+
import java.io.File;
16+
import java.io.IOException;
1417
import java.util.*;
18+
import java.util.stream.Collectors;
1519

1620
public class MekaFeatureSet extends FeatureSet implements IFeatureSet {
1721

@@ -26,14 +30,34 @@ public void createFeatures() {
2630

2731
List<FeatureSet.Type> featureSets = initializeFeatures();
2832

33+
Instances trainInstances = null;
34+
Instances structure = null;
35+
2936
//Create and set attributes for the train instances
30-
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTrainMethods(), featureSets);
31-
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), getCategories(options.getAllClasses()), "train-instances");
32-
this.instances.put("train", convertToMekaInstances(trainInstances));
37+
if (options.getInstances().isEmpty()) {
38+
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTrainMethods(), featureSets);
39+
trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), getCategories(options.getAllClasses()));
40+
} else {
41+
ArffLoader loader = new ArffLoader();
42+
43+
try {
44+
loader.setSource(new File(options.getInstances().get(0)));
45+
trainInstances = loader.getDataSet();
46+
structure = loader.getStructure();
47+
} catch (IOException e) {
48+
e.printStackTrace();
49+
}
50+
}
3351

3452
//Create and set attributes for the test instances.
53+
Attribute idAttr = new Attribute("id", dataset.getTestMethods().stream().map(Method::getArffSafeSignature).collect(Collectors.toList()));
54+
structure.replaceAttributeAt(idAttr, structure.attribute("id").index());
55+
ArrayList<Attribute> aList = Collections.list(structure.enumerateAttributes());
56+
3557
ArrayList<Attribute> testAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTestMethods(), featureSets);
36-
Instances testInstances = createInstances(featureSets, testAttributes, dataset.getTestMethods(), getCategories(options.getAllClasses()), "test-instances");
58+
Instances testInstances = (createInstances(structure, featureSets, aList, dataset.getTestMethods(), getCategories(options.getAllClasses())));
59+
60+
this.instances.put("train", convertToMekaInstances(trainInstances));
3761
this.instances.put("test", convertToMekaInstances(testInstances));
3862
}
3963

@@ -67,7 +91,7 @@ public Instances convertToMekaInstances(Instances instances) {
6791
filter.setAttributeIndices("1-11");
6892
filter.setInputFormat(instances);
6993
output = Filter.useFilter(instances, filter);
70-
output.setRelationName(instances.relationName() + ":" + output.relationName());
94+
output.setRelationName("swan-srm:" + output.relationName());
7195

7296
Util.exportInstancesToArff(output);
7397
} catch (Exception e) {
@@ -85,5 +109,4 @@ public HashSet<Category> getCategories(List<String> cat) {
85109

86110
return categories;
87111
}
88-
89112
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/WekaFeatureSet.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public void createFeatures() {
3434
ArrayList<Attribute> trainAttributes = createAttributes(category, dataset.getTrainMethods(), featureSets);
3535

3636
String instanceName = category.getId().toLowerCase() + "-train-instances";
37-
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), Collections.singleton(category), instanceName);
37+
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), Collections.singleton(category));
3838
this.instances.put(category.getId().toLowerCase(), trainInstances);
3939
Util.exportInstancesToArff(trainInstances);
4040

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/io/dataset/SrmList.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ public SrmList(String sourceFileDir) {
3131
methods = new HashSet<>();
3232
}
3333

34-
3534
public void load(final Set<Method> trainingSet) {
3635

3736
Util.createSubclassAnnotations(methods, "classpath");

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/io/dataset/SrmListUtils.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,9 @@ public static SrmList importFile(String file) throws IOException {
4343
*/
4444
public static void exportFile(SrmList srmList, String file) throws IOException {
4545

46-
srmList.removeUnclassifiedMethods();
4746
ObjectMapper objectMapper = new ObjectMapper();
4847
objectMapper.writeValue(new File(file), srmList);
49-
logger.info("{} methods exported to {}", srmList.getMethods().size(), file);
48+
logger.info("{} SRMs exported to {}", srmList.getMethods().size(), file);
5049
}
5150

5251
public static void removeUndocumentedMethods(SrmList list) {

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/model/toolkit/Meka.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ public Meka(MekaFeatureSet features, SwanOptions options, Set<Method> methods) {
3737
* Trains and evaluates the model with the given training data and specified classification mode.
3838
*
3939
*/
40-
public void trainModel() {
40+
public SrmList trainModel() {
4141

4242
switch (ModelEvaluator.Phase.valueOf(options.getPhase().toUpperCase())) {
4343
case VALIDATE:
4444
crossValidateModel(features.getTrainInstances());
45-
break;
45+
return null;
4646
case PREDICT:
4747
HashMap<String, ArrayList<Category>> predictions = predictModel(features.getTrainInstances(), features.getTestInstances(), options.getPredictionThreshold());
4848

@@ -51,14 +51,9 @@ public void trainModel() {
5151
method.addCategory(category);
5252
}
5353
}
54-
55-
try {
56-
SrmListUtils.exportFile(new SrmList(methods), options.getOutputDir() + File.separator + "swan-srm-cwe-list.json");
57-
} catch (IOException e) {
58-
e.printStackTrace();
59-
}
60-
break;
54+
return new SrmList(methods);
6155
}
56+
return null;
6257
}
6358

6459
/**

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/soot/Soot.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ public class Soot {
2323
public Soot(String... path) {
2424

2525
this.classpath = Util.buildCP(path);
26+
logger.info("Soot Classpath {}", classpath);
2627
configure(classpath);
2728
}
2829

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/util/Util.java

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
package de.fraunhofer.iem.swan.util;
22

3+
import de.fraunhofer.iem.swan.SwanPipeline;
4+
import de.fraunhofer.iem.swan.data.Category;
5+
import de.fraunhofer.iem.swan.data.Method;
6+
import org.apache.commons.io.FileUtils;
7+
import org.nd4j.common.io.ClassUtils;
8+
import org.slf4j.Logger;
9+
import org.slf4j.LoggerFactory;
10+
import weka.core.Attribute;
11+
import weka.core.Instances;
12+
import weka.core.converters.ArffSaver;
13+
314
import java.io.File;
415
import java.io.FileInputStream;
516
import java.io.FileReader;
@@ -8,17 +19,6 @@
819
import java.util.zip.ZipEntry;
920
import java.util.zip.ZipInputStream;
1021

11-
import de.fraunhofer.iem.swan.SwanPipeline;
12-
import org.slf4j.Logger;
13-
import org.slf4j.LoggerFactory;
14-
import de.fraunhofer.iem.swan.features.code.type.IFeature.Type;
15-
import de.fraunhofer.iem.swan.data.Category;
16-
import de.fraunhofer.iem.swan.data.Method;
17-
import de.fraunhofer.iem.swan.features.code.type.AbstractSootFeature;
18-
import soot.SootMethod;
19-
import weka.core.Attribute;
20-
import weka.core.Instances;
21-
import weka.core.converters.ArffSaver;
2222

2323
public class Util {
2424
private static final Logger logger = LoggerFactory.getLogger(Util.class);
@@ -72,19 +72,27 @@ public static void printStatistics(String message, Set<Method> methods) {
7272
}
7373

7474
public static Set<String> getAllClassesFromDirectory(String dir) throws IOException {
75+
7576
Set<String> classes = new HashSet<>();
76-
File folder = new File(dir);
77-
File[] listOfFiles = folder.listFiles();
78-
if (listOfFiles != null) {
79-
for (File listOfFile : listOfFiles) {
80-
if (listOfFile.getName().endsWith(".jar"))
81-
classes.addAll(getAllClassesFromJar(listOfFile.getAbsolutePath()));
77+
78+
for (File file : FileUtils.listFiles(new File(dir), new String[]{"class", "jar"}, true)) {
79+
if (file.getName().endsWith(".jar"))
80+
classes.addAll(getAllClassesFromJar(file.getAbsolutePath()));
81+
else if (file.getName().endsWith(".class")) {
82+
83+
String packageName = file.getCanonicalPath().replace(dir, "").replace(".class", "");
84+
85+
if (packageName.startsWith(File.separator))
86+
packageName = packageName.replaceFirst(File.separator, "");
87+
88+
classes.add(ClassUtils.convertResourcePathToClassName(packageName));
8289
}
8390
}
8491
return classes;
8592
}
8693

8794
public static Map<String, String> getAllClassesFromDir(String dir) throws IOException {
95+
8896
Map<String, String> classes = new HashMap<>();
8997
File folder = new File(dir);
9098
File[] listOfFiles = folder.listFiles();
@@ -107,37 +115,44 @@ private static Set<String> getAllClassesFromJar(String jarFile) throws IOExcepti
107115
Set<String> classes = new HashSet<>();
108116
ZipInputStream zip = new ZipInputStream(new FileInputStream(jarFile));
109117
for (ZipEntry entry = zip.getNextEntry(); entry != null; entry = zip.getNextEntry()) {
118+
110119
if (!entry.isDirectory() && entry.getName().endsWith(".class")) {
111-
String className = entry.getName().replace('/', '.');
112-
className = className.substring(0, className.length() - ".class".length());
113-
if (className.contains("$"))
114-
className = className.substring(0, className.indexOf("$") - 1);
115-
classes.add(className);
120+
classes.add(getClassname(entry.getName()));
116121
}
117122
}
118123
zip.close();
119124
return classes;
120125
}
121126

127+
public static String getClassname(String path) {
128+
129+
String className = path.replace('/', '.');
130+
className = className.substring(0, className.length() - ".class".length());
131+
if (className.contains("$"))
132+
className = className.substring(0, className.indexOf("$") - 1);
133+
System.out.println(className);
134+
return className;
135+
}
136+
137+
122138
public static String buildCP(String[] dirs) {
123-
StringBuilder sb = new StringBuilder();
139+
140+
ArrayList<String> paths = new ArrayList<>();
124141

125142
for (String dir : dirs) {
143+
126144
File folder = new File(dir);
127-
File[] listOfFiles = folder.listFiles();
128145

129-
if (listOfFiles != null) {
130-
for (File listOfFile : listOfFiles) {
131-
if (listOfFile.getName().endsWith(".jar") || listOfFile.getName().endsWith(".apk")) {
132-
if (sb.length() > 0) {
133-
sb.append(System.getProperty("path.separator"));
134-
}
135-
sb.append(listOfFile.getAbsolutePath());
136-
}
146+
//If folder contains .class files, add path to classpath
147+
if (FileUtils.listFiles(folder, new String[]{"class"}, true).size() > 0)
148+
paths.add(dir);
149+
else {
150+
for (File file : FileUtils.listFiles(folder, new String[]{"jar", "apk"}, true)) {
151+
paths.add(file.getAbsolutePath());
137152
}
138153
}
139154
}
140-
return sb.toString();
155+
return String.join(File.pathSeparator, paths);
141156
}
142157

143158
/**
@@ -282,7 +297,7 @@ public static String getClassName(Instances instances) {
282297
public static String exportInstancesToArff(Instances instances) {
283298
ArffSaver saver = new ArffSaver();
284299

285-
if (SwanPipeline.options.isExportArffData()) {
300+
if (SwanPipeline.options.isExportArffData() && !SwanPipeline.options.getOutputDir().isEmpty() ) {
286301
// Save arff data.
287302
saver.setInstances(instances);
288303

@@ -299,9 +314,10 @@ public static String exportInstancesToArff(Instances instances) {
299314
} catch (IOException e) {
300315
e.printStackTrace();
301316
}
317+
return saver.retrieveFile().getAbsolutePath();
302318
}
303319

304-
return saver.retrieveFile().getAbsolutePath();
320+
return null;
305321
}
306322

307323
/**

0 commit comments

Comments
 (0)