Skip to content

Commit df7a6a8

Browse files
committed
performance improvement: filtering the id attribute before
classification
1 parent 86d96a1 commit df7a6a8

File tree

7 files changed

+33
-20
lines changed

7 files changed

+33
-20
lines changed

.classpath

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
44
<classpathentry kind="src" path="src"/>
55
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
6-
<classpathentry kind="lib" path="lib/soot-infoflow-trunk.jar"/>
7-
<classpathentry kind="lib" path="lib/soot.jar"/>
86
<classpathentry kind="lib" path="lib/weka.jar"/>
7+
<classpathentry kind="lib" path="lib/soot-infoflow-trunk.jar"/>
98
<classpathentry kind="output" path="bin"/>
109
</classpath>
830 Bytes
Binary file not shown.
-287 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

src/de/fraunhofer/iem/mois/Learner.java

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import weka.classifiers.bayes.NaiveBayes;
2323
import weka.classifiers.functions.Logistic;
2424
import weka.classifiers.functions.SMO;
25+
import weka.classifiers.meta.FilteredClassifier;
2526
import weka.classifiers.rules.JRip;
2627
import weka.classifiers.rules.OneR;
2728
import weka.classifiers.trees.DecisionStump;
@@ -32,6 +33,7 @@
3233
import weka.core.Instances;
3334
import weka.core.Range;
3435
import weka.core.converters.ArffSaver;
36+
import weka.filters.unsupervised.attribute.Remove;
3537

3638
/**
3739
* Finds possible sources and sinks in a given set of system methods using a
@@ -46,8 +48,8 @@ public class Learner {
4648
private long startAnalysisTime;
4749
private long analysisTime;
4850

49-
private final boolean CROSS_EVALUATE=true;
50-
private final boolean CLASSIFY=false;
51+
private final boolean CROSS_EVALUATE=false;
52+
private final boolean CLASSIFY=true;
5153

5254
private final int CROSS_EVALUATE_ITERATIONS=1;
5355

@@ -61,6 +63,7 @@ public double classify(Set<Method> trainingSet, Set<Method> testSet,
6163
Map<Category, Set<IFeature>> features, Set<Category> categories,
6264
String outputFile, boolean cweMode) throws IOException {
6365

66+
6467
double fmeasure=0;
6568
startAnalysisTime = System.currentTimeMillis();
6669
Map<Category, Integer> counters = new HashMap<Category, Integer>();
@@ -186,32 +189,36 @@ public double classify(Set<Method> trainingSet, Set<Method> testSet,
186189
}
187190
//System.out.println("Done.");
188191

192+
193+
194+
189195
// Create classifier.
190196
try {
191197
// instances.randomize(new Random(1337));
192-
Classifier classifier = null;
198+
FilteredClassifier classifier = new FilteredClassifier();
193199
// (IBK / kNN) vs. SMO vs. (J48 vs. JRIP) vs. NaiveBayes
194200
// MultiClassClassifier für ClassifierPerformanceEvaluator
195201
if (WEKA_LEARNER_ALL.equals("BayesNet"))
196-
classifier = new BayesNet();
202+
classifier.setClassifier(new BayesNet());
197203
else if (WEKA_LEARNER_ALL.equals("NaiveBayes"))
198-
classifier = new NaiveBayes();
204+
classifier.setClassifier(new NaiveBayes());
199205
else if (WEKA_LEARNER_ALL.equals("J48"))
200-
classifier = new J48();
206+
classifier.setClassifier(new J48());
201207
else if (WEKA_LEARNER_ALL.equals("SMO"))
202-
classifier = new SMO();
208+
classifier.setClassifier(new SMO());
203209
else if (WEKA_LEARNER_ALL.equals("JRip"))
204-
classifier = new JRip();
210+
classifier.setClassifier(new JRip());
205211
else if (WEKA_LEARNER_ALL.equals("DecisionStump"))
206-
classifier = new DecisionStump();
212+
classifier.setClassifier(new DecisionStump());
207213
else if (WEKA_LEARNER_ALL.equals("OneR"))
208-
classifier = new OneR();
214+
classifier.setClassifier(new OneR());
209215
else if (WEKA_LEARNER_ALL.equals("Logistic"))
210-
classifier = new Logistic();
216+
classifier.setClassifier(new Logistic());
211217
else
212218
throw new Exception("Wrong WEKA learner!");
213219
// System.out.println("Classifier created: " + WEKA_LEARNER_ALL);
214220

221+
215222
// Save arff data.
216223
ArffSaver saver = new ArffSaver();
217224
saver.setInstances(trainInstances);
@@ -259,15 +266,23 @@ sb, new Range(attributes.indexOf(idAttr) + 1
259266
// Classification.
260267

261268
if(CLASSIFY) {
269+
270+
Remove rm = new Remove();
271+
rm.setAttributeIndicesArray(new int[]{idAttr.index()});
272+
classifier.setFilter(rm);
273+
262274
//System.out.println("Classification starting.");
263275
classifier.buildClassifier(trainInstances);
264276
if (WEKA_LEARNER_ALL.equals("J48")) {
265-
System.out.println(((J48) (classifier)).graph());
277+
System.out.println(((classifier)).graph());
266278
}
267279
for (int instIdx = 0; instIdx < testInstances.numInstances(); instIdx++) {
268280
Instance inst = testInstances.instance(instIdx);
269281
assert inst.classIsMissing();
270282
Method meth = instanceMethods.get(inst.stringValue(idAttr));
283+
284+
285+
271286
double d = classifier.classifyInstance(inst);
272287
String cName = testInstances.classAttribute().value((int) d);
273288
boolean found = false;
@@ -297,7 +312,7 @@ sb, new Range(attributes.indexOf(idAttr) + 1
297312

298313
Runtime.getRuntime().gc();
299314
analysisTime = System.currentTimeMillis() - startAnalysisTime;
300-
//System.out.println("Time to classify " + categories.toString() + ": "+ analysisTime + " ms");
315+
System.out.println("Time to classify " + categories.toString() + ": "+ analysisTime + " ms");
301316

302317
return fmeasure;
303318
// writer.writeRIFLSpecification(outputFile, methods);

src/de/fraunhofer/iem/mois/Main.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ public static void main(String[] args) {
4949
private static final boolean runSources = true;
5050
private static final boolean runSinks = true;
5151
private static final boolean runSanitizers = true;
52-
private static final boolean runAuthentications = true;
52+
private static final boolean runAuthentications = false;
5353
private static final boolean runCwes = true;
5454

55-
private static final boolean runOAT = true; // run one at a time analysis
55+
private static final boolean runOAT = false; // run one at a time analysis
5656

5757
private void run(String[] args) throws IOException {
5858
int iterations = 0;
@@ -144,7 +144,7 @@ private void run(String[] args) throws IOException {
144144
iter++;
145145
}
146146
}
147-
System.out.println("***** F Measure is " + averageF/iter);
147+
//System.out.println("***** F Measure is " + averageF/iter);
148148

149149
//System.out.println("***** Writing final results");
150150
Set<String> tmpFiles = Util.getFiles(args[3]);

src/de/fraunhofer/iem/mois/data/Category.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@ public enum Category {
1414
Constants.AUTHENTICATION_NOCHANGE, false), SANITIZER(Constants.SANITIZER,
1515
false), NONE(Constants.NONE, false),
1616

17-
CWE089("CWE089", true), CWE306("CWE306", true), CWE078("CWE078",
17+
CWE089("CWE089", true),CWE306("CWE306", true), CWE078("CWE078",
1818
true), CWE862("CWE862", true), CWE863("CWE863",
1919
true), CWE601("CWE601", true), CWETEST("CWEtest", true), CWE079("CWE079",true), CWE_NONE("none", true);
20-
2120
private final String id;
2221
private final boolean cwe;
2322

0 commit comments

Comments
 (0)