Skip to content

Commit 018b4a4

Browse files
authored
Merge pull request #50 from secure-software-engineering/fix/meka-instances-generation
Fix/meka test and train instances generation
2 parents 86d360e + 1366d61 commit 018b4a4

File tree

4 files changed

+69
-20
lines changed

4 files changed

+69
-20
lines changed

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/MekaFeatureSet.java

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import weka.core.Instances;
1212
import weka.core.converters.ArffLoader;
1313
import weka.filters.Filter;
14+
import weka.filters.unsupervised.attribute.Remove;
1415

1516
import java.io.File;
1617
import java.io.IOException;
@@ -36,27 +37,39 @@ public void createFeatures() {
3637

3738
//Create and set attributes for the train instances
3839
if (options.getArffInstancesFiles().isEmpty()) {
39-
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTrainMethods(), featureSets);
40+
ArrayList<Attribute> trainAttributes = createAttributes(getCategories(options.getAllClasses()), dataset.getTrainMethods());
4041
structure = new Instances("swan-srm", trainAttributes, 0);
41-
convertToMekaInstances(structure);
4242

4343
Set<Method> methods = new HashSet<>(dataset.getTrainMethods());
4444
methods.addAll(dataset.getTestMethods());
4545

4646
evaluateFeatureData(methods);
47-
trainInstances = createInstances(structure, trainAttributes, dataset.getTrainMethods(), getCategories(options.getAllClasses()));
47+
trainInstances = createInstances(new Instances(structure), trainAttributes, dataset.getTrainMethods(), getCategories(options.getAllClasses()));
4848
} else {
4949
ArffLoader loader = new ArffLoader();
5050

5151
try {
5252
loader.setSource(new File(options.getArffInstancesFiles().get(0)));
53+
5354
trainInstances = loader.getDataSet();
5455
structure = loader.getStructure();
56+
57+
//append remaining instances
58+
if (options.getArffInstancesFiles().size() > 1) {
59+
for (int x = 1; x < options.getArffInstancesFiles().size(); x++) {
60+
61+
ArffLoader arffLoader = new ArffLoader();
62+
arffLoader.setSource(new File(options.getArffInstancesFiles().get(x)));
63+
64+
trainInstances = mergeInstances(trainInstances, arffLoader.getDataSet());
65+
structure = mergeInstances(structure, arffLoader.getStructure());
66+
}
67+
}
5568
} catch (IOException e) {
5669
e.printStackTrace();
5770
}
5871

59-
createAttributes(getCategories(options.getAllClasses()), dataset.getTestMethods(), featureSets);
72+
createAttributes(getCategories(options.getAllClasses()), dataset.getTestMethods());
6073
evaluateFeatureData(dataset.getTestMethods());
6174
}
6275
testInstances = createTestSet();
@@ -65,24 +78,61 @@ public void createFeatures() {
6578
this.instances.put("test", convertToMekaInstances(testInstances));
6679
}
6780

81+
/**
82+
* Merge two instances into one instances object.
83+
* @param first instances
84+
* @param second instances
85+
* @return merged instances
86+
*/
87+
public Instances mergeInstances(Instances first, Instances second) {
88+
89+
for (int c = 0; c < 12; c++) {
90+
second.renameAttribute(second.attribute(c), "b_" + second.attribute(c).name());
91+
}
92+
93+
Instances instances = Instances.mergeInstances(first, second);
94+
95+
ArrayList<Integer> indices = new ArrayList<>();
96+
97+
for (int att = 0; att < instances.numAttributes(); att++) {
98+
if (instances.attribute(att).name().startsWith("b_")) {
99+
indices.add(att);
100+
}
101+
}
102+
103+
Remove removeFilter = new Remove();
104+
removeFilter.setAttributeIndicesArray(indices.stream().mapToInt(i -> i).toArray());
105+
removeFilter.setInvertSelection(false);
106+
107+
try {
108+
removeFilter.setInputFormat(instances);
109+
instances = Filter.useFilter(instances, removeFilter);
110+
111+
} catch (Exception e) {
112+
e.printStackTrace();
113+
}
114+
115+
return instances;
116+
}
117+
68118
public Instances createTestSet() {
69119

120+
Instances testInstances = new Instances(structure);
70121
//Create and set attributes for the test instances.
71122
Attribute idAttr = new Attribute("id", dataset.getTestMethods().stream().map(Method::getArffSafeSignature).collect(Collectors.toList()));
72-
structure.replaceAttributeAt(idAttr, structure.attribute("id").index());
73-
ArrayList<Attribute> aList = Collections.list(structure.enumerateAttributes());
123+
testInstances.replaceAttributeAt(idAttr, testInstances.attribute("id").index());
124+
ArrayList<Attribute> aList = Collections.list(testInstances.enumerateAttributes());
74125

75-
return createInstances(structure, aList, dataset.getTestMethods(), getCategories(options.getAllClasses()));
126+
return createInstances(testInstances, aList, dataset.getTestMethods(), getCategories(options.getAllClasses()));
76127
}
77128

78129
/**
79130
* Creates instances and adds attributes for the features, classes, and method signatures.
80131
*
81-
* @param categories list of categories
82-
* @param methods list of training methods
83-
* @param featureSets classification mode
132+
* @param categories list of categories
133+
* @param methods list of training methods
84134
*/
85-
public ArrayList<Attribute> createAttributes(Set<Category> categories, Set<Method> methods, List<FeatureSet.Type> featureSets) {
135+
public ArrayList<Attribute> createAttributes(Set<Category> categories, Set<Method> methods) {
86136

87137
ArrayList<Attribute> attributes = new ArrayList<>();
88138

@@ -107,7 +157,7 @@ public Instances convertToMekaInstances(Instances instances) {
107157
output = Filter.useFilter(instances, filter);
108158
output.setRelationName("swan-srm:" + output.relationName());
109159

110-
Util.exportInstancesToArff(output);
160+
Util.exportInstancesToArff(output, "meka");
111161
} catch (Exception e) {
112162
e.printStackTrace();
113163
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/WekaFeatureSet.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public void createFeatures() {
3636
String instanceName = category.getId().toLowerCase() + "-train-instances";
3737
Instances trainInstances = createInstances(trainAttributes, dataset.getTrainMethods(), Collections.singleton(category));
3838
this.instances.put(category.getId().toLowerCase(), trainInstances);
39-
Util.exportInstancesToArff(trainInstances);
39+
Util.exportInstancesToArff(trainInstances, "weka");
4040

4141
//Create and set attributes for the test instances.
4242
/*ArrayList<Attribute> testAttributes = createAttributes(getCategories(category), testData.getMethods(), featureSets);

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/model/toolkit/MLPlan.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public MLPlan() {
4949
*/
5050
public HashMap<String, ArrayList<Double>> evaluateDataset(Instances instances1) {
5151

52-
String arffFilePath = Util.exportInstancesToArff(instances1);
52+
String arffFilePath = Util.exportInstancesToArff(instances1, "mlplan");
5353
ArffDatasetAdapter arffDatasetAdapter = new ArffDatasetAdapter();
5454

5555
String mClass = Util.getClassName(instances1);
@@ -148,7 +148,7 @@ public HashMap<String, ArrayList<Double>> evaluateDataset(Instances instances1)
148148
public void evaluateDataset(Instances instances, int k) {
149149

150150
//arffFilePath = "swan/swan_core/src/main/resources/waveform.arff";
151-
String arffFilePath = Util.exportInstancesToArff(instances);
151+
String arffFilePath = Util.exportInstancesToArff(instances, "mlplan");
152152
ArffDatasetAdapter arffDatasetAdapter = new ArffDatasetAdapter();
153153

154154
String mClass = Util.getClassName(instances);

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/util/Util.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -294,10 +294,10 @@ public static String getClassName(Instances instances) {
294294
*
295295
* @param instances WEKA instances to be exported
296296
*/
297-
public static String exportInstancesToArff(Instances instances) {
297+
public static String exportInstancesToArff(Instances instances, String source) {
298298
ArffSaver saver = new ArffSaver();
299299

300-
if (SwanPipeline.options.isExportArffData() && !SwanPipeline.options.getOutputDir().isEmpty() ) {
300+
if (SwanPipeline.options.isExportArffData() && !SwanPipeline.options.getOutputDir().isEmpty()) {
301301
// Save arff data.
302302
saver.setInstances(instances);
303303

@@ -308,16 +308,15 @@ public static String exportInstancesToArff(Instances instances) {
308308
if (instances.relationName().contains(":"))
309309
relationName = relationName.substring(0, instances.relationName().indexOf(":"));
310310

311-
String arffFile = SwanPipeline.options.getOutputDir() + File.separator + "arff-data" + File.separator + relationName + ".arff";
311+
String arffFile = SwanPipeline.options.getOutputDir() + File.separator + "arff-data" + File.separator + relationName + "-" + source + ".arff";
312312
saver.setFile(new File(arffFile));
313313
saver.writeBatch();
314314
} catch (IOException e) {
315315
e.printStackTrace();
316316
}
317317
return saver.retrieveFile().getAbsolutePath();
318318
}
319-
320-
return null;
319+
return null;
321320
}
322321

323322
/**

0 commit comments

Comments
 (0)