Skip to content

Commit d103586

Browse files
committed
added the OAT analysis in mois.
1 parent 845a6d0 commit d103586

File tree

10 files changed

+1388
-979
lines changed

10 files changed

+1388
-979
lines changed
1.97 KB
Binary file not shown.
-958 Bytes
Binary file not shown.
-544 Bytes
Binary file not shown.
50 Bytes
Binary file not shown.
-182 Bytes
Binary file not shown.

src/de/fraunhofer/iem/mois/FeatureHandler.java

Lines changed: 1244 additions & 873 deletions
Large diffs are not rendered by default.

src/de/fraunhofer/iem/mois/Learner.java

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,24 @@ public class Learner {
4949
private final boolean CROSS_EVALUATE=true;
5050
private final boolean CLASSIFY=false;
5151

52-
private final int CROSS_EVALUATE_ITERATIONS=10;
52+
private final int CROSS_EVALUATE_ITERATIONS=1;
5353

5454
private final Writer writer;
5555

5656
public Learner(Writer writer) {
5757
this.writer = writer;
5858
}
5959

60-
public void classify(Set<Method> trainingSet, Set<Method> testSet,
60+
public double classify(Set<Method> trainingSet, Set<Method> testSet,
6161
Map<Category, Set<IFeature>> features, Set<Category> categories,
6262
String outputFile, boolean cweMode) throws IOException {
6363

64+
double fmeasure=0;
6465
startAnalysisTime = System.currentTimeMillis();
6566
Map<Category, Integer> counters = new HashMap<Category, Integer>();
6667

6768
// Collect the possible values.
68-
System.out.println("Initializing classifier.");
69+
//System.out.println("Initializing classifier.");
6970
FastVector ordinal = new FastVector();
7071
ordinal.addElement("true");
7172
ordinal.addElement("false");
@@ -80,7 +81,7 @@ public void classify(Set<Method> trainingSet, Set<Method> testSet,
8081

8182
// Collect all attributes for the categories we classify into, and create
8283
// the instance set.
83-
System.out.print("Collecting attributes... ");
84+
//System.out.print("Collecting attributes... ");
8485
Map<IFeature, Attribute> featureAttribs =
8586
new HashMap<IFeature, Attribute>();
8687
FastVector attributes = new FastVector();
@@ -110,10 +111,10 @@ public void classify(Set<Method> trainingSet, Set<Method> testSet,
110111
Attribute idAttr = new Attribute("id", methodStrings);
111112
attributes.addElement(idAttr);
112113

113-
System.out.println(attributes.size() + " attributes collected.");
114+
//System.out.println(attributes.size() + " attributes collected.");
114115

115116
// Set attributes to the train and test instances.
116-
System.out.print("Creating instances... ");
117+
//System.out.print("Creating instances... ");
117118
Instances trainInstances = new Instances("trainingmethods", attributes, 0);
118119
Instances testInstances = new Instances("allmethods", attributes, 0);
119120
trainInstances.setClass(classAttr);
@@ -183,7 +184,7 @@ public void classify(Set<Method> trainingSet, Set<Method> testSet,
183184
testInstances.add(inst);
184185
}
185186
}
186-
System.out.println("Done.");
187+
//System.out.println("Done.");
187188

188189
// Create classifier.
189190
try {
@@ -209,7 +210,7 @@ else if (WEKA_LEARNER_ALL.equals("Logistic"))
209210
classifier = new Logistic();
210211
else
211212
throw new Exception("Wrong WEKA learner!");
212-
System.out.println("Classifier created: " + WEKA_LEARNER_ALL);
213+
// System.out.println("Classifier created: " + WEKA_LEARNER_ALL);
213214

214215
// Save arff data.
215216
ArffSaver saver = new ArffSaver();
@@ -221,17 +222,17 @@ else if (WEKA_LEARNER_ALL.equals("Logistic"))
221222
fileName = fileName.replace(", ", "_");
222223
saver.setFile(new File("Train_" + fileName + ".arff"));
223224
saver.writeBatch();
224-
System.out.println(
225-
"Arff data saved at: " + saver.retrieveFile().getCanonicalPath());
225+
//System.out.println( "Arff data saved at: " + saver.retrieveFile().getCanonicalPath());
226226

227227
// Cross evaluation.
228228
if(CROSS_EVALUATE) {
229229

230230
double precision= 0;
231231
double recall = 0;
232+
double f = 0;
232233
for(int i = 0; i< CROSS_EVALUATE_ITERATIONS; i++)
233234
{
234-
System.out.println("Starting cross evaluation (iteration "+i+").");
235+
//System.out.println("Starting cross evaluation (iteration "+i+").");
235236
Evaluation eval = new Evaluation(trainInstances);
236237

237238
StringBuffer sb = new StringBuffer();
@@ -240,22 +241,25 @@ sb, new Range(attributes.indexOf(idAttr) + 1
240241
+ ""/* "1-" + (attributes.size() - 1) */),
241242
true);
242243
//System.out.println(sb.toString());
243-
System.out.println("Class details: " + eval.toClassDetailsString());
244+
//System.out.println("Class details: " + eval.toClassDetailsString());
244245
precision += eval.weightedPrecision();
245246
recall += eval.weightedRecall();
247+
f += eval.weightedFMeasure();
246248

247-
for (Category counter : counters.keySet())
248-
System.out.println("Cross evaluation finished on a training set of "
249-
+ counters.get(counter) + " " + counter + ".");
249+
//for (Category counter : counters.keySet())
250+
//System.out.println("Cross evaluation finished on a training set of " + counters.get(counter) + " " + counter + ".");
250251
}
251-
System.out.println("The precision over "+ CROSS_EVALUATE_ITERATIONS +" iterations is " + (precision/CROSS_EVALUATE_ITERATIONS));
252-
System.out.println("The recall over "+ CROSS_EVALUATE_ITERATIONS +" iterations is " + (recall/CROSS_EVALUATE_ITERATIONS));
252+
//System.out.println("The precision over "+ CROSS_EVALUATE_ITERATIONS +" iterations is " + round((precision/CROSS_EVALUATE_ITERATIONS),4));
253+
//System.out.println("The recall over "+ CROSS_EVALUATE_ITERATIONS +" iterations is " + round((recall/CROSS_EVALUATE_ITERATIONS),4));
254+
fmeasure = f/CROSS_EVALUATE_ITERATIONS;
255+
System.out.println("The F-measure over "+ CROSS_EVALUATE_ITERATIONS +" iterations is " + fmeasure);
256+
253257
}
254258

255259
// Classification.
256260

257261
if(CLASSIFY) {
258-
System.out.println("Classification starting.");
262+
//System.out.println("Classification starting.");
259263
classifier.buildClassifier(trainInstances);
260264
if (WEKA_LEARNER_ALL.equals("J48")) {
261265
System.out.println(((J48) (classifier)).graph());
@@ -277,7 +281,7 @@ sb, new Range(attributes.indexOf(idAttr) + 1
277281
}
278282
if (!found) System.err.println("Unknown class name");
279283
}
280-
System.out.println("Finished classification.");
284+
//System.out.println("Finished classification.");
281285
}
282286

283287
}
@@ -287,15 +291,21 @@ sb, new Range(attributes.indexOf(idAttr) + 1
287291
}
288292

289293

290-
System.out.println("Writing results to files:");
294+
//System.out.println("Writing results to files:");
291295
writer.writeResultsToFiles(outputFile, methods, categories);
292296
//writer.writeResultsToFilesQWEL(outputFile, methods, categories);
293297

294298
Runtime.getRuntime().gc();
295299
analysisTime = System.currentTimeMillis() - startAnalysisTime;
296-
System.out.println("Time to classify " + categories.toString() + ": "
297-
+ analysisTime + " ms");
300+
//System.out.println("Time to classify " + categories.toString() + ": "+ analysisTime + " ms");
298301

302+
return fmeasure;
299303
// writer.writeRIFLSpecification(outputFile, methods);
300304
}
305+
306+
public double round(double val, int decimals) {
307+
val = val*(10 * decimals);
308+
val = Math.round(val);
309+
return val /(10 * decimals);
310+
}
301311
}

src/de/fraunhofer/iem/mois/Loader.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ public Type appliesInternal(Method method) {
8484
}
8585

8686
}.applies(new Method("a", "void", "x.y"));
87-
System.out.println("Loaded " + (methods.size() - methodCount)
88-
+ " methods from the test JAR.");
87+
//System.out.println("Loaded " + (methods.size() - methodCount) + " methods from the test JAR.");
8988
}
9089

9190
public void pruneNone() {
@@ -94,8 +93,7 @@ public void pruneNone() {
9493
if (!m.getCategoriesClassified().isEmpty())
9594
newMethods.add(m);
9695
}
97-
System.out.println(
98-
methods.size() + " methods prunned down to " + newMethods.size());
96+
//System.out.println( methods.size() + " methods prunned down to " + newMethods.size());
9997
methods = newMethods;
10098
}
10199
}

src/de/fraunhofer/iem/mois/Main.java

Lines changed: 107 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public static void main(String[] args) {
3131
}
3232
Main main = new Main();
3333
main.run(args);
34-
System.out.println("Done.");
34+
//System.out.println("Done.");
3535
} catch (IOException e) {
3636
e.printStackTrace();
3737
}
@@ -52,88 +52,118 @@ public static void main(String[] args) {
5252
private static final boolean runAuthentications = true;
5353
private static final boolean runCwes = true;
5454

55+
private static final boolean runOAT = true; // run one at a time analysis
56+
5557
private void run(String[] args) throws IOException {
56-
// Cache the list of classes and the CP.
57-
System.out.println("***** Loading CP");
58-
Set<String> testClasses = Util.getAllClassesFromDirectory(args[0]);
59-
String testCp = Util.buildCP(args[0]);
60-
String trainingCp = Util.buildCP(args[1]);
61-
outputPath = args[3];
62-
System.out
63-
.println("Training set cp: " + trainingCp + "\nTest set cp: " + testCp);
64-
65-
// Cache the features.
66-
System.out.println("***** Loading features");
67-
featureHandler = new FeatureHandler(
68-
trainingCp + System.getProperty("path.separator") + testCp);
69-
featureHandler.initializeFeatures();
70-
71-
// Cache the methods from the training set.
72-
System.out.println("***** Loading train data");
73-
parser = new Parser(trainingCp);
74-
parser.loadTrainingSet(Collections.singleton(args[2]));
75-
76-
// Cache the methods from the testing set.
77-
System.out.println("***** Loading test data");
78-
loader = new Loader(testCp);
79-
loader.loadTestSet(testClasses, parser.methods());
80-
81-
// Prepare classifier.
82-
System.out.println("***** Preparing classifier");
83-
writer = new Writer(loader.methods());
84-
learner = new Learner(writer);
85-
86-
// Classify.
87-
if(runSources)
88-
runClassifier(
89-
new HashSet<Category>(Arrays.asList(Category.SOURCE, Category.NONE)),
90-
false);
91-
if(runSinks)
92-
runClassifier(
93-
new HashSet<Category>(Arrays.asList(Category.SINK, Category.NONE)),
94-
false);
95-
if(runSanitizers)
96-
runClassifier(
97-
new HashSet<Category>(Arrays.asList(Category.SANITIZER, Category.NONE)),
98-
false);
99-
if(runAuthentications)
100-
runClassifier(new HashSet<Category>(Arrays.asList(
101-
Category.AUTHENTICATION_TO_HIGH, Category.AUTHENTICATION_TO_LOW,
102-
Category.AUTHENTICATION_NEUTRAL, Category.NONE)), false);
103-
104-
// Save data from last classification.
105-
loader.resetMethods();
106-
107-
// Cache the methods from the second test set.
108-
System.out.println("***** Loading 2nd test set");
109-
loader.pruneNone();
110-
111-
if (runCwes) {
112-
//Run classifications for all cwes in JSON file.
113-
for (String cweId : parser.cwe()) {
114-
runClassifier(
115-
new HashSet<Category>(Arrays
116-
.asList(Category.getCategoryForCWE(cweId), Category.NONE)),
117-
true);
118-
}
119-
}
120-
121-
System.out.println("***** Writing final results");
122-
Set<String> tmpFiles = Util.getFiles(args[3]);
123-
writer.printResultsTXT(loader.methods(), tmpFiles, args[3] + File.separator + "txt" + File.separator + "output.txt");
124-
writer.writeResultsQWEL(loader.methods(), args[3] + File.separator + "qwel" + File.separator + "output.qwel");
125-
writer.writeResultsSoot(loader.methods(), args[3] + File.separator + "soot-qwel" + File.separator + "output.sqwel");
126-
writer.printResultsJSON(loader.methods(), tmpFiles, args[3] + File.separator + "json" + File.separator + "output.json" );
58+
int iterations = 0;
59+
if(runOAT)
60+
iterations = 206; // number of features //TODO: improve code: better borders here.
61+
62+
// for OAT analysis. Each feature is disabled once.
63+
for(int i = 0; i<=iterations; i++)
64+
{
65+
if (i == 0)
66+
System.out.println("***** Running with all features.");
67+
else {
68+
System.out.println("***** Running without " + i + "th feature");
69+
}
70+
// Cache the list of classes and the CP.
71+
//System.out.println("***** Loading CP");
72+
Set<String> testClasses = Util.getAllClassesFromDirectory(args[0]);
73+
String testCp = Util.buildCP(args[0]);
74+
String trainingCp = Util.buildCP(args[1]);
75+
outputPath = args[3];
76+
//System.out.println("Training set cp: " + trainingCp + "\nTest set cp: " + testCp);
77+
78+
79+
// Cache the features.
80+
//System.out.println("***** Loading features");
81+
featureHandler = new FeatureHandler(
82+
trainingCp + System.getProperty("path.separator") + testCp);
83+
featureHandler.initializeFeatures(i); // use 0 for all feature instances
84+
85+
// Cache the methods from the training set.
86+
//System.out.println("***** Loading train data");
87+
parser = new Parser(trainingCp);
88+
parser.loadTrainingSet(Collections.singleton(args[2]));
89+
90+
// Cache the methods from the testing set.
91+
//System.out.println("***** Loading test data");
92+
loader = new Loader(testCp);
93+
loader.loadTestSet(testClasses, parser.methods());
94+
95+
// Prepare classifier.
96+
//System.out.println("***** Preparing classifier");
97+
writer = new Writer(loader.methods());
98+
learner = new Learner(writer);
99+
100+
double averageF=0;
101+
int iter =0;
102+
// Classify.
103+
if(runSources){
104+
averageF+= runClassifier(
105+
new HashSet<Category>(Arrays.asList(Category.SOURCE, Category.NONE)),
106+
false);
107+
iter++;
108+
}
109+
if(runSinks) {
110+
averageF+= runClassifier(
111+
new HashSet<Category>(Arrays.asList(Category.SINK, Category.NONE)),
112+
false);
113+
iter++;
114+
}
115+
116+
if(runSanitizers) {
117+
averageF+= runClassifier(
118+
new HashSet<Category>(Arrays.asList(Category.SANITIZER, Category.NONE)),
119+
false);
120+
iter++;
121+
}
122+
123+
if(runAuthentications)
124+
{
125+
averageF+=runClassifier(new HashSet<Category>(Arrays.asList(
126+
Category.AUTHENTICATION_TO_HIGH, Category.AUTHENTICATION_TO_LOW,
127+
Category.AUTHENTICATION_NEUTRAL, Category.NONE)), false);
128+
iter++;
129+
}
130+
// Save data from last classification.
131+
loader.resetMethods();
132+
133+
// Cache the methods from the second test set.
134+
//System.out.println("***** Loading 2nd test set");
135+
loader.pruneNone();
136+
137+
if (runCwes) {
138+
//Run classifications for all cwes in JSON file.
139+
for (String cweId : parser.cwe()) {
140+
averageF += runClassifier(
141+
new HashSet<Category>(Arrays
142+
.asList(Category.getCategoryForCWE(cweId), Category.NONE)),
143+
true);
144+
iter++;
145+
}
146+
}
147+
System.out.println("***** F Measure is " + averageF/iter);
148+
149+
//System.out.println("***** Writing final results");
150+
Set<String> tmpFiles = Util.getFiles(args[3]);
151+
writer.printResultsTXT(loader.methods(), tmpFiles, args[3] + File.separator + "txt" + File.separator + "output.txt");
152+
writer.writeResultsQWEL(loader.methods(), args[3] + File.separator + "qwel" + File.separator + "output.qwel");
153+
writer.writeResultsSoot(loader.methods(), args[3] + File.separator + "soot-qwel" + File.separator + "output.sqwel");
154+
writer.printResultsJSON(loader.methods(), tmpFiles, args[3] + File.separator + "json" + File.separator + "output.json" );
155+
}
156+
127157
}
128158

129-
private void runClassifier(HashSet<Category> categories, boolean cweMode)
159+
private double runClassifier(HashSet<Category> categories, boolean cweMode)
130160
throws IOException {
131161
parser.resetMethods();
132162
loader.resetMethods();
133-
System.out
134-
.println("***** Starting classification for " + categories.toString());
135-
learner.classify(parser.methods(), loader.methods(),
163+
//System.out.println("***** Starting classification for " + categories.toString());
164+
return learner.classify(parser.methods(), loader.methods(),
136165
featureHandler.features(), categories, outputPath + File.separator + "txt"+ File.separator+"output.txt", cweMode);
137166
}
138167

139-
}
168+
169+
}

0 commit comments

Comments
 (0)