Skip to content

Commit 4284a91

Browse files
committed
Refactor and cleanup main class
1 parent 8572053 commit 4284a91

File tree

1 file changed

+159
-88
lines changed
  • swan_core/src/main/java/de/fraunhofer/iem/swan

1 file changed

+159
-88
lines changed
Lines changed: 159 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,23 @@
11
package de.fraunhofer.iem.swan;
22

33
import de.fraunhofer.iem.swan.data.Category;
4+
import de.fraunhofer.iem.swan.doc.features.DocFeatureHandler;
5+
import de.fraunhofer.iem.swan.doc.features.automatic.AutomaticFeatureHandler;
6+
import de.fraunhofer.iem.swan.doc.features.automatic.DocCommentVector;
7+
import de.fraunhofer.iem.swan.doc.features.manual.ManualFeaturesHandler;
8+
import de.fraunhofer.iem.swan.doc.util.Utils;
9+
import de.fraunhofer.iem.swan.features.FeatureHandler;
10+
import de.fraunhofer.iem.swan.io.FileUtility;
11+
import de.fraunhofer.iem.swan.io.Loader;
12+
import de.fraunhofer.iem.swan.io.Parser;
13+
import de.fraunhofer.iem.swan.io.Writer;
14+
import de.fraunhofer.iem.swan.model.InstancesHandler;
15+
import de.fraunhofer.iem.swan.model.Learner;
416
import de.fraunhofer.iem.swan.util.SwanConfig;
17+
import de.fraunhofer.iem.swan.util.Util;
18+
import org.slf4j.Logger;
19+
import org.slf4j.LoggerFactory;
20+
import weka.core.Instances;
521

622
import java.io.File;
723
import java.io.IOException;
@@ -15,6 +31,34 @@
1531

1632
public class Main {
1733

34+
private Learner learner;
35+
private Loader loader;
36+
private Parser parser;
37+
private FeatureHandler featureHandler;
38+
private String outputPath;
39+
private Writer writer;
40+
41+
// Configuration tags for debugging
42+
private static final boolean runSources = true;
43+
private static final boolean runSinks = true;
44+
private static final boolean runSanitizers = true;
45+
private static final boolean runAuthentications = true;
46+
private static final boolean runRelevant = true;
47+
private static final boolean runCwes = true;
48+
49+
private static final boolean runOAT = false; // run one at a time analysis
50+
private static final Logger logger = LoggerFactory.getLogger(Main.class);
51+
52+
private static final InstancesHandler.INSTANCE_SET INSTANCE_TYPE = InstancesHandler.INSTANCE_SET.SWAN_SWANDOC_MANUAL;
53+
private static final Learner.LEARN_MODE LEARNING_MODE = Learner.LEARN_MODE.MANUAL;
54+
55+
public static String INPUT = "/Users/oshando/Projects/thesis/03-code/swandoc/src/main/resources/training-jars";
56+
public static String JAVADOC_OUTPUT = "/Users/oshando/Projects/thesis/03-code/training-docs";
57+
public static String TRAINING_SET = "/Users/oshando/Projects/thesis/03-code/swandoc/src/main/resources/training-set-javadoc.json";
58+
59+
60+
DocFeatureHandler docFeatureHandler;
61+
1862
public static void main(String[] args) {
1963

2064
try {
@@ -44,30 +88,11 @@ public static void main(String[] args) {
4488

4589
Main main = new Main();
4690
main.run(sourceDir, trainSourceCode, trainJson, outputDir);
47-
// System.out.println("Done.");
48-
} catch (IOException e) {
49-
e.printStackTrace();
50-
} catch (InterruptedException e) {
91+
} catch (IOException | InterruptedException e) {
5192
e.printStackTrace();
5293
}
5394
}
5495

55-
private Learner learner;
56-
private Loader loader;
57-
private Parser parser;
58-
private FeatureHandler featureHandler;
59-
private String outputPath;
60-
private Writer writer;
61-
62-
// Configuration tags for debugging
63-
private static final boolean runSources = true;
64-
private static final boolean runSinks = true;
65-
private static final boolean runSanitizers = true;
66-
private static final boolean runAuthentications = false;
67-
private static final boolean runCwes = true;
68-
69-
private static final boolean runOAT = false; // run one at a time analysis
70-
7196
/**
7297
* This method executes the analysis and can also be called from outside by
7398
* clients. It uses the builtin training data.
@@ -102,8 +127,7 @@ public void run(String sourceDir, String trainSourceCode, String trainJson, Stri
102127
throws IOException, InterruptedException {
103128

104129
// This helper object keeps track of created temporary directories and files to
105-
// to be deleted before exiting the
106-
// application.
130+
// to be deleted before exiting the application.
107131
FileUtility fileUtility = new FileUtility();
108132

109133
if (trainJson == null) {
@@ -120,124 +144,171 @@ public void run(String sourceDir, String trainSourceCode, String trainJson, Stri
120144
internalRun(sourceDir, trainSourceCode, trainJson, outputDir);
121145

122146
} finally {
123-
124147
// Delete temporary files and folders that have been created.
125148
fileUtility.dispose();
126149
}
127-
128150
}
129151

130152
private void internalRun(String sourceDir, String trainSourceCode, String trainJson, String outputDir)
131153
throws IOException, InterruptedException {
132154

155+
long startAnalysisTime = System.currentTimeMillis();
156+
133157
int iterations = 0;
134158
if (runOAT)
135159
iterations = 206; // number of features //TODO: improve code: better borders here.
136160

137161
// for OAT analysis. Each feature is disabled once.
138162
for (int i = 0; i <= iterations; i++) {
139163
if (i == 0)
140-
System.out.println("***** Running with all features.");
164+
logger.info("Running with all features.");
141165
else {
142-
System.out.println("***** Running without " + i + "th feature");
166+
logger.info("Running without " + i + "th feature");
143167
}
168+
144169
// Cache the list of classes and the CP.
145-
// System.out.println("***** Loading CP");
146170
Set<String> testClasses = Util.getAllClassesFromDirectory(sourceDir);
147171
String testCp = Util.buildCP(sourceDir);
172+
173+
logger.info("Loading train data from {}", trainSourceCode);
148174
String trainingCp = Util.buildCP(trainSourceCode);
149175
outputPath = outputDir;
150-
// System.out.println("Training set cp: " + trainingCp + "\nTest set cp: " +
151-
// testCp);
152-
153-
// Cache the features.
154-
// System.out.println("***** Loading features");
155-
featureHandler = new FeatureHandler(trainingCp + System.getProperty("path.separator") + testCp);
156-
featureHandler.initializeFeatures(i); // use 0 for all feature instances
157176

158177
// Cache the methods from the training set.
159-
// System.out.println("***** Loading train data");
160178
parser = new Parser(trainingCp);
161179
parser.loadTrainingSet(Collections.singleton(trainJson));
180+
logger.info("{} training methods, distribution={}",
181+
parser.methods().size(), Utils.countCategories(parser.methods(), false));
182+
183+
//Remove methods that do not have method doc comments
184+
parser.removeUndocumentedMethods();
185+
logger.info("Remove undocumented training methods. Remaining {}, distribution={}",
186+
parser.methods().size(), Utils.countCategories(parser.methods(), false));
162187

163188
// Cache the methods from the testing set.
164-
// System.out.println("***** Loading test data");
189+
logger.info("Loading test data from {}", sourceDir);
165190
loader = new Loader(testCp);
166191
loader.loadTestSet(testClasses, parser.methods());
167192

193+
// Cache the features.
194+
logger.info("Loading feature instances");
195+
featureHandler = new FeatureHandler(trainingCp + System.getProperty("path.separator") + testCp);
196+
featureHandler.initializeFeatures(i); // use 0 for all feature instances
197+
198+
//Populate SWAN feature attributes
199+
docFeatureHandler = null;
200+
switch (INSTANCE_TYPE) {
201+
case SWANDOC_MANUAL:
202+
case SWAN_SWANDOC_MANUAL:
203+
204+
docFeatureHandler = new DocFeatureHandler(parser.getMethods());
205+
docFeatureHandler.initialiseManualFeatureSet();
206+
docFeatureHandler.evaluateManualFeatureData();
207+
break;
208+
case SWANDOC_AUTOMATIC:
209+
case SWAN_SWANDOC_AUTOMATIC:
210+
211+
docFeatureHandler = new DocFeatureHandler(parser.getMethods());
212+
docFeatureHandler.initialiseAutomaticFeatureSet();
213+
docFeatureHandler.evaluateAutomaticFeatureData();
214+
break;
215+
}
216+
168217
// Prepare classifier.
169-
// System.out.println("***** Preparing classifier");
218+
logger.info("Preparing classifier");
170219
writer = new Writer(loader.methods());
171220
learner = new Learner(writer);
172221

173-
double averageF = 0;
174-
int iter = 0;
175-
// Classify.
176-
if (runSources) {
177-
averageF += runClassifier(new HashSet<Category>(Arrays.asList(Category.SOURCE, Category.NONE)), false);
178-
iter++;
179-
}
180-
if (runSinks) {
181-
averageF += runClassifier(new HashSet<Category>(Arrays.asList(Category.SINK, Category.NONE)), false);
182-
iter++;
183-
}
184-
185-
if (runSanitizers) {
186-
averageF += runClassifier(new HashSet<Category>(Arrays.asList(Category.SANITIZER, Category.NONE)),
187-
false);
188-
iter++;
189-
}
222+
/*
223+
FIRST PHASE - binary classification for each of the categories.
224+
(1) Classify: source, sink, sanitizer,
225+
auth-no-change, auth-unsafe-state, auth-safe-state
226+
(2) Classify: relevant
227+
*/
228+
runClassEvaluation(false);
190229

191-
if (runAuthentications) {
192-
averageF += runClassifier(
193-
new HashSet<Category>(Arrays.asList(Category.AUTHENTICATION_TO_HIGH,
194-
Category.AUTHENTICATION_TO_LOW, Category.AUTHENTICATION_NEUTRAL, Category.NONE)),
195-
false);
196-
iter++;
197-
}
198230
// Save data from last classification.
199231
loader.resetMethods();
200232

201233
// Cache the methods from the second test set.
202-
// System.out.println("***** Loading 2nd test set");
203234
loader.pruneNone();
204235

236+
/*
237+
SECOND PHASE - binary classification for each of the CWE categories.
238+
(1) Classify: cwe78, cwe079, cwe089, cwe306, cwe601, cwe862, cwe863
239+
*/
240+
runClassEvaluation(true);
241+
242+
SwanConfig swanConfig = new SwanConfig();
243+
Properties config = swanConfig.getConfig();
244+
String fileName = config.getProperty("output_file_name");
245+
246+
String outputFile = outputDir + File.separator + fileName + ".json";
247+
logger.info("Writing results to {}", outputFile);
248+
writer.printResultsJSON(loader.methods(), outputFile);
249+
250+
long analysisTime = System.currentTimeMillis() - startAnalysisTime;
251+
logger.info("Total runtime {} mins", analysisTime/60000);
252+
}
253+
}
254+
255+
public void runClassEvaluation(boolean forCwe) throws IOException, InterruptedException {
256+
257+
if (forCwe) {
258+
259+
// Run classifications for all CWEs in JSON file.
205260
if (runCwes) {
206-
// Run classifications for all cwes in JSON file.
207261
for (String cweId : parser.cwe()) {
208-
averageF += runClassifier(
209-
new HashSet<Category>(Arrays.asList(Category.getCategoryForCWE(cweId), Category.NONE)),
210-
true);
211-
iter++;
262+
// if (cweId.toLowerCase().contains("cwe306"))
263+
runClassifier(
264+
new HashSet<>(Arrays.asList(Category.getCategoryForCWE(cweId), Category.NONE)),
265+
Learner.EVAL_MODE.CLASS);
212266
}
213267
}
214-
// System.out.println("***** F Measure is " + averageF/iter);
268+
} else {
215269

216-
SwanConfig swanConfig = new SwanConfig();
217-
Properties config = swanConfig.getConfig();
218-
String fileName = config.getProperty("output_file_name");
270+
if (runSources) {
271+
runClassifier(new HashSet<>(Arrays.asList(Category.SOURCE, Category.NONE)), Learner.EVAL_MODE.CLASS);
272+
}
273+
274+
if (runSinks) {
275+
runClassifier(new HashSet<>(Arrays.asList(Category.SINK, Category.NONE)), Learner.EVAL_MODE.CLASS);
276+
}
277+
278+
if (runSanitizers) {
279+
runClassifier(new HashSet<>(Arrays.asList(Category.SANITIZER, Category.NONE)),
280+
Learner.EVAL_MODE.CLASS);
281+
}
282+
283+
if (runAuthentications) {
284+
runClassifier(
285+
new HashSet<>(Arrays.asList(Category.AUTHENTICATION_TO_HIGH,
286+
Category.AUTHENTICATION_TO_LOW, Category.AUTHENTICATION_NEUTRAL, Category.NONE)),
287+
Learner.EVAL_MODE.CLASS);
288+
}
219289

220-
// System.out.println("***** Writing final results");
221-
// Set<String> tmpFiles = Util.getFiles(outputDir);
222-
writer.printResultsTXT(loader.methods(),
223-
outputDir + File.separator + "txt" + File.separator + fileName + ".txt");
224-
writer.writeResultsQWEL(loader.methods(),
225-
outputDir + File.separator + "qwel" + File.separator + fileName + ".qwel");
226-
writer.writeResultsSoot(loader.methods(),
227-
outputDir + File.separator + "soot-qwel" + File.separator + fileName + ".sqwel");
228-
writer.printResultsJSON(loader.methods(), outputDir + File.separator + fileName + ".json");
229-
writer.writeResultsQwelXML(loader.methods(), outputDir + File.separator + fileName + ".xml");
290+
if (runRelevant) {
291+
runClassifier(new HashSet<>(Arrays.asList(Category.RELEVANT, Category.NONE)), Learner.EVAL_MODE.RELEVANCE);
292+
}
230293
}
231294
}
232295

233-
private double runClassifier(HashSet<Category> categories, boolean cweMode)
234-
throws IOException, InterruptedException {
296+
297+
private double runClassifier(HashSet<Category> categories, Learner.EVAL_MODE eval_mode) {
235298
parser.resetMethods();
236299
loader.resetMethods();
237-
// System.out.println("***** Starting classification for " +
238-
// categories.toString());
239-
return learner.classify(parser.methods(), loader.methods(), featureHandler.features(), categories,
240-
outputPath + File.separator + "txt" + File.separator + "output.txt", cweMode);
241-
}
242300

301+
logger.info("Starting classification for {}", categories.toString());
302+
303+
InstancesHandler instancesHandler = new InstancesHandler();
304+
Instances instances = instancesHandler.createInstances(parser.getMethods(), featureHandler.features(), docFeatureHandler, categories, INSTANCE_TYPE);
305+
long startAnalysisTime = System.currentTimeMillis();
306+
307+
learner.trainModel(instances, LEARNING_MODE);
308+
309+
long analysisTime = System.currentTimeMillis() - startAnalysisTime;
310+
logger.info("Total time for classification {}ms", analysisTime);
311+
312+
return 0.0;
313+
}
243314
}

0 commit comments

Comments
 (0)