Skip to content

Commit 7f4265a

Browse files
committed
Merge code for test/train set into dataset object
1 parent 8b87d09 commit 7f4265a

File tree

7 files changed

+88
-78
lines changed

7 files changed

+88
-78
lines changed

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/FeatureSet.java

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
import de.fraunhofer.iem.swan.data.Category;
55
import de.fraunhofer.iem.swan.data.Method;
66
import de.fraunhofer.iem.swan.features.code.CodeFeatureHandler;
7-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
87
import de.fraunhofer.iem.swan.features.code.type.IFeature;
98
import de.fraunhofer.iem.swan.features.doc.DocFeatureHandler;
109
import de.fraunhofer.iem.swan.features.doc.manual.IDocFeature;
1110
import de.fraunhofer.iem.swan.features.doc.nlp.AnnotatedMethod;
12-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
11+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
1312
import de.fraunhofer.iem.swan.model.ModelEvaluator;
1413
import weka.core.Attribute;
1514
import weka.core.DenseInstance;
@@ -24,9 +23,8 @@ abstract class FeatureSet {
2423
protected Map<IFeature, Attribute> codeAttributes;
2524
protected final HashMap<String, Integer> instanceMap;
2625
protected final SwanOptions options;
27-
protected SrmList trainData;
26+
protected Dataset dataset;
2827
protected CodeFeatureHandler codeFeatureHandler;
29-
protected SourceFileLoader testData;
3028
protected DocFeatureHandler docFeatureHandler;
3129
protected HashMap<String, Instances> instances;
3230
protected ModelEvaluator.Toolkit toolkit;
@@ -58,11 +56,10 @@ public static FeatureSet.Type getValue(String value) {
5856
}
5957
}
6058

61-
public FeatureSet(SrmList trainData, SourceFileLoader testData, SwanOptions options, ModelEvaluator.Toolkit toolkit) {
59+
public FeatureSet(Dataset dataset, SwanOptions options, ModelEvaluator.Toolkit toolkit) {
6260
this.instanceMap = new HashMap<>();
6361
this.options = options;
64-
this.trainData = trainData;
65-
this.testData = testData;
62+
this.dataset = dataset;
6663
this.toolkit = toolkit;
6764
instances = new HashMap<>();
6865
}
@@ -84,13 +81,13 @@ public List<FeatureSet.Type> initializeFeatures() {
8481
break;
8582
case DOC_MANUAL:
8683

87-
docFeatureHandler = new DocFeatureHandler(trainData.getMethods());
84+
docFeatureHandler = new DocFeatureHandler(dataset.getTrainMethods());
8885
docFeatureHandler.initialiseManualFeatureSet();
8986
docFeatureHandler.evaluateManualFeatureData();
9087
break;
9188
case DOC_AUTO:
9289

93-
docFeatureHandler = new DocFeatureHandler(trainData.getMethods());
90+
docFeatureHandler = new DocFeatureHandler(dataset.getTrainMethods());
9491
docFeatureHandler.initialiseAutomaticFeatureSet();
9592
docFeatureHandler.evaluateAutomaticFeatureData();
9693
break;

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/FeatureSetSelector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
11
package de.fraunhofer.iem.swan.features;
22

33
import de.fraunhofer.iem.swan.cli.SwanOptions;
4-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
5-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
4+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
65
import de.fraunhofer.iem.swan.model.ModelEvaluator;
76

87
public class FeatureSetSelector {
98

10-
public IFeatureSet select(SrmList trainData, SourceFileLoader testData, SwanOptions options) {
9+
public IFeatureSet select(Dataset dataset, SwanOptions options) {
1110

1211
switch (ModelEvaluator.Toolkit.valueOf(options.getToolkit().toUpperCase())) {
1312

1413
case MEKA:
15-
MekaFeatureSet mekaFeatureSet = new MekaFeatureSet(trainData, testData, options);
14+
MekaFeatureSet mekaFeatureSet = new MekaFeatureSet(dataset, options);
1615
mekaFeatureSet.createFeatures();
1716
return mekaFeatureSet;
1817
case WEKA:
19-
WekaFeatureSet wekaFeatureSet = new WekaFeatureSet(trainData, testData, options);
18+
WekaFeatureSet wekaFeatureSet = new WekaFeatureSet(dataset, options);
2019
wekaFeatureSet.createFeatures();
2120
return wekaFeatureSet;
2221
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/MekaFeatureSet.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import de.fraunhofer.iem.swan.cli.SwanOptions;
44
import de.fraunhofer.iem.swan.data.Category;
55
import de.fraunhofer.iem.swan.data.Method;
6-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
7-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
6+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
87
import de.fraunhofer.iem.swan.model.ModelEvaluator;
98
import de.fraunhofer.iem.swan.util.Util;
109
import meka.filters.unsupervised.attribute.MekaClassAttributes;
@@ -16,8 +15,8 @@
1615

1716
public class MekaFeatureSet extends FeatureSet implements IFeatureSet {
1817

19-
public MekaFeatureSet(SrmList trainData, SourceFileLoader testData, SwanOptions options) {
20-
super(trainData, testData, options, ModelEvaluator.Toolkit.MEKA);
18+
public MekaFeatureSet(Dataset dataset, SwanOptions options) {
19+
super(dataset, options, ModelEvaluator.Toolkit.MEKA);
2120
}
2221

2322
/**
@@ -28,15 +27,13 @@ public void createFeatures() {
2827
List<FeatureSet.Type> featureSets = initializeFeatures();
2928

3029
//Create and set attributes for the train instances
31-
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), trainData.getMethods(), featureSets);
32-
Instances trainInstances = createInstances(featureSets, trainAttributes, trainData.getMethods(), getCategories(options.getAllClasses()), "train-instances");
30+
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTrainMethods(), featureSets);
31+
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), getCategories(options.getAllClasses()), "train-instances");
3332
this.instances.put("train", convertToMekaInstances(trainInstances));
3433

35-
36-
System.out.println(testData.getMethods().size());
3734
//Create and set attributes for the test instances.
38-
ArrayList<Attribute> testAttributes = createAttributes(getCategories(options.getAllClasses()), testData.getMethods(), featureSets);
39-
Instances testInstances = createInstances(featureSets, testAttributes, testData.getMethods(), getCategories(options.getAllClasses()), "test-instances");
35+
ArrayList<Attribute> testAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTestMethods(), featureSets);
36+
Instances testInstances = createInstances(featureSets, testAttributes, dataset.getTestMethods(), getCategories(options.getAllClasses()), "test-instances");
4037
this.instances.put("test", convertToMekaInstances(testInstances));
4138
}
4239

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/WekaFeatureSet.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import de.fraunhofer.iem.swan.cli.SwanOptions;
44
import de.fraunhofer.iem.swan.data.Category;
55
import de.fraunhofer.iem.swan.data.Method;
6-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
7-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
6+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
87
import de.fraunhofer.iem.swan.model.ModelEvaluator;
98
import de.fraunhofer.iem.swan.util.Util;
109
import weka.core.Attribute;
@@ -18,8 +17,8 @@
1817
*/
1918
public class WekaFeatureSet extends FeatureSet implements IFeatureSet {
2019

21-
public WekaFeatureSet(SrmList trainData, SourceFileLoader testData, SwanOptions options) {
22-
super(trainData, testData, options, ModelEvaluator.Toolkit.WEKA);
20+
public WekaFeatureSet(Dataset dataset, SwanOptions options) {
21+
super(dataset, options, ModelEvaluator.Toolkit.WEKA);
2322
}
2423

2524
/**
@@ -32,10 +31,10 @@ public void createFeatures() {
3231
for (Category category : options.getAllClasses().stream().map(Category::fromText).collect(Collectors.toList())) {
3332

3433
//Create and set attributes for the train instances
35-
ArrayList<Attribute> trainAttributes = createAttributes(category, trainData.getMethods(), featureSets);
34+
ArrayList<Attribute> trainAttributes = createAttributes(category, dataset.getTrainMethods(), featureSets);
3635

3736
String instanceName = category.getId().toLowerCase() + "-train-instances";
38-
Instances trainInstances = createInstances(featureSets, trainAttributes, trainData.getMethods(), Collections.singleton(category), instanceName);
37+
Instances trainInstances = createInstances(featureSets, trainAttributes, dataset.getTrainMethods(), Collections.singleton(category), instanceName);
3938
this.instances.put(category.getId().toLowerCase(), trainInstances);
4039
Util.exportInstancesToArff(trainInstances);
4140

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/code/soot/SourceFileLoader.java

Lines changed: 0 additions & 48 deletions
This file was deleted.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package de.fraunhofer.iem.swan.io.dataset;
2+
3+
import de.fraunhofer.iem.swan.data.Method;
4+
5+
import java.util.Set;
6+
7+
public class Dataset {
8+
9+
private SrmList train;
10+
private SrmList test;
11+
12+
public SrmList getTrain() {
13+
return train;
14+
}
15+
16+
public void setTrain(SrmList train) {
17+
this.train = train;
18+
}
19+
20+
public SrmList getTest() {
21+
return test;
22+
}
23+
24+
public void setTest(SrmList test) {
25+
this.test = test;
26+
}
27+
28+
public Set<Method> getTrainMethods() {
29+
return train.getMethods();
30+
}
31+
32+
public Set<Method> getTestMethods() {
33+
return test.getMethods();
34+
}
35+
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/io/dataset/SrmList.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import com.fasterxml.jackson.annotation.JsonIgnore;
44
import de.fraunhofer.iem.swan.data.Method;
5+
import de.fraunhofer.iem.swan.util.Util;
6+
7+
import java.io.IOException;
8+
import java.util.HashSet;
59
import java.util.Set;
610
import java.util.stream.Collectors;
711

@@ -11,8 +15,27 @@
1115
public class SrmList {
1216

1317
private Set<Method> methods;
18+
@JsonIgnore
19+
private Set<String> testClasses;
1420

1521
public SrmList() {
22+
testClasses = new HashSet<>();
23+
}
24+
25+
public SrmList(String sourceFileDir) {
26+
try {
27+
testClasses = Util.getAllClassesFromDirectory(sourceFileDir);
28+
} catch (IOException e) {
29+
e.printStackTrace();
30+
}
31+
methods = new HashSet<>();
32+
}
33+
34+
35+
public void load(final Set<Method> trainingSet) {
36+
37+
Util.createSubclassAnnotations(methods, "classpath");
38+
methods = Util.sanityCheck(methods, trainingSet);
1639
}
1740

1841
public SrmList(Set<Method> methodList) {
@@ -27,6 +50,14 @@ public void setMethods(Set<Method> methods) {
2750
this.methods = methods;
2851
}
2952

53+
public Set<String> getTestClasses() {
54+
return testClasses;
55+
}
56+
57+
public void setTestClasses(Set<String> testClasses) {
58+
this.testClasses = testClasses;
59+
}
60+
3061
public void removeUnclassifiedMethods() {
3162

3263
methods = methods.stream().filter(m -> m.getAllCategories().size() > 0)

0 commit comments

Comments
 (0)