11package de .fraunhofer .iem .swan ;
22
33import de .fraunhofer .iem .swan .data .Category ;
4+ import de .fraunhofer .iem .swan .doc .features .DocFeatureHandler ;
5+ import de .fraunhofer .iem .swan .doc .features .automatic .AutomaticFeatureHandler ;
6+ import de .fraunhofer .iem .swan .doc .features .automatic .DocCommentVector ;
7+ import de .fraunhofer .iem .swan .doc .features .manual .ManualFeaturesHandler ;
8+ import de .fraunhofer .iem .swan .doc .util .Utils ;
9+ import de .fraunhofer .iem .swan .features .FeatureHandler ;
10+ import de .fraunhofer .iem .swan .io .FileUtility ;
11+ import de .fraunhofer .iem .swan .io .Loader ;
12+ import de .fraunhofer .iem .swan .io .Parser ;
13+ import de .fraunhofer .iem .swan .io .Writer ;
14+ import de .fraunhofer .iem .swan .model .InstancesHandler ;
15+ import de .fraunhofer .iem .swan .model .Learner ;
416import de .fraunhofer .iem .swan .util .SwanConfig ;
17+ import de .fraunhofer .iem .swan .util .Util ;
18+ import org .slf4j .Logger ;
19+ import org .slf4j .LoggerFactory ;
20+ import weka .core .Instances ;
521
622import java .io .File ;
723import java .io .IOException ;
1531
1632public class Main {
1733
34+ private Learner learner ;
35+ private Loader loader ;
36+ private Parser parser ;
37+ private FeatureHandler featureHandler ;
38+ private String outputPath ;
39+ private Writer writer ;
40+
41+ // Configuration tags for debugging
42+ private static final boolean runSources = true ;
43+ private static final boolean runSinks = true ;
44+ private static final boolean runSanitizers = true ;
45+ private static final boolean runAuthentications = true ;
46+ private static final boolean runRelevant = true ;
47+ private static final boolean runCwes = true ;
48+
49+ private static final boolean runOAT = false ; // run one at a time analysis
50+ private static final Logger logger = LoggerFactory .getLogger (Main .class );
51+
52+ private static final InstancesHandler .INSTANCE_SET INSTANCE_TYPE = InstancesHandler .INSTANCE_SET .SWAN_SWANDOC_MANUAL ;
53+ private static final Learner .LEARN_MODE LEARNING_MODE = Learner .LEARN_MODE .MANUAL ;
54+
55+ public static String INPUT = "/Users/oshando/Projects/thesis/03-code/swandoc/src/main/resources/training-jars" ;
56+ public static String JAVADOC_OUTPUT = "/Users/oshando/Projects/thesis/03-code/training-docs" ;
57+ public static String TRAINING_SET = "/Users/oshando/Projects/thesis/03-code/swandoc/src/main/resources/training-set-javadoc.json" ;
58+
59+
60+ DocFeatureHandler docFeatureHandler ;
61+
1862 public static void main (String [] args ) {
1963
2064 try {
@@ -44,30 +88,11 @@ public static void main(String[] args) {
4488
4589 Main main = new Main ();
4690 main .run (sourceDir , trainSourceCode , trainJson , outputDir );
47- // System.out.println("Done.");
48- } catch (IOException e ) {
49- e .printStackTrace ();
50- } catch (InterruptedException e ) {
91+ } catch (IOException | InterruptedException e ) {
5192 e .printStackTrace ();
5293 }
5394 }
5495
55- private Learner learner ;
56- private Loader loader ;
57- private Parser parser ;
58- private FeatureHandler featureHandler ;
59- private String outputPath ;
60- private Writer writer ;
61-
62- // Configuration tags for debugging
63- private static final boolean runSources = true ;
64- private static final boolean runSinks = true ;
65- private static final boolean runSanitizers = true ;
66- private static final boolean runAuthentications = false ;
67- private static final boolean runCwes = true ;
68-
69- private static final boolean runOAT = false ; // run one at a time analysis
70-
7196 /**
7297 * This method executes the analysis and can also be called from outside by
7398 * clients. It uses the builtin training data.
@@ -102,8 +127,7 @@ public void run(String sourceDir, String trainSourceCode, String trainJson, Stri
102127 throws IOException , InterruptedException {
103128
104129 // This helper object keeps track of created temporary directories and files to
105- // to be deleted before exiting the
106- // application.
130+ // to be deleted before exiting the application.
107131 FileUtility fileUtility = new FileUtility ();
108132
109133 if (trainJson == null ) {
@@ -120,124 +144,171 @@ public void run(String sourceDir, String trainSourceCode, String trainJson, Stri
120144 internalRun (sourceDir , trainSourceCode , trainJson , outputDir );
121145
122146 } finally {
123-
124147 // Delete temporary files and folders that have been created.
125148 fileUtility .dispose ();
126149 }
127-
128150 }
129151
130152 private void internalRun (String sourceDir , String trainSourceCode , String trainJson , String outputDir )
131153 throws IOException , InterruptedException {
132154
155+ long startAnalysisTime = System .currentTimeMillis ();
156+
133157 int iterations = 0 ;
134158 if (runOAT )
135159 iterations = 206 ; // number of features //TODO: improve code: better borders here.
136160
137161 // for OAT analysis. Each feature is disabled once.
138162 for (int i = 0 ; i <= iterations ; i ++) {
139163 if (i == 0 )
140- System . out . println ( "***** Running with all features." );
164+ logger . info ( " Running with all features." );
141165 else {
142- System . out . println ( "***** Running without " + i + "th feature" );
166+ logger . info ( " Running without " + i + "th feature" );
143167 }
168+
144169 // Cache the list of classes and the CP.
145- // System.out.println("***** Loading CP");
146170 Set <String > testClasses = Util .getAllClassesFromDirectory (sourceDir );
147171 String testCp = Util .buildCP (sourceDir );
172+
173+ logger .info ("Loading train data from {}" , trainSourceCode );
148174 String trainingCp = Util .buildCP (trainSourceCode );
149175 outputPath = outputDir ;
150- // System.out.println("Training set cp: " + trainingCp + "\nTest set cp: " +
151- // testCp);
152-
153- // Cache the features.
154- // System.out.println("***** Loading features");
155- featureHandler = new FeatureHandler (trainingCp + System .getProperty ("path.separator" ) + testCp );
156- featureHandler .initializeFeatures (i ); // use 0 for all feature instances
157176
158177 // Cache the methods from the training set.
159- // System.out.println("***** Loading train data");
160178 parser = new Parser (trainingCp );
161179 parser .loadTrainingSet (Collections .singleton (trainJson ));
180+ logger .info ("{} training methods, distribution={}" ,
181+ parser .methods ().size (), Utils .countCategories (parser .methods (), false ));
182+
183+ //Remove methods that do not have method doc comments
184+ parser .removeUndocumentedMethods ();
185+ logger .info ("Remove undocumented training methods. Remaining {}, distribution={}" ,
186+ parser .methods ().size (), Utils .countCategories (parser .methods (), false ));
162187
163188 // Cache the methods from the testing set.
164- // System.out.println("***** Loading test data" );
189+ logger . info ( " Loading test data from {}" , sourceDir );
165190 loader = new Loader (testCp );
166191 loader .loadTestSet (testClasses , parser .methods ());
167192
193+ // Cache the features.
194+ logger .info ("Loading feature instances" );
195+ featureHandler = new FeatureHandler (trainingCp + System .getProperty ("path.separator" ) + testCp );
196+ featureHandler .initializeFeatures (i ); // use 0 for all feature instances
197+
198+ //Populate SWAN feature attributes
199+ docFeatureHandler = null ;
200+ switch (INSTANCE_TYPE ) {
201+ case SWANDOC_MANUAL :
202+ case SWAN_SWANDOC_MANUAL :
203+
204+ docFeatureHandler = new DocFeatureHandler (parser .getMethods ());
205+ docFeatureHandler .initialiseManualFeatureSet ();
206+ docFeatureHandler .evaluateManualFeatureData ();
207+ break ;
208+ case SWANDOC_AUTOMATIC :
209+ case SWAN_SWANDOC_AUTOMATIC :
210+
211+ docFeatureHandler = new DocFeatureHandler (parser .getMethods ());
212+ docFeatureHandler .initialiseAutomaticFeatureSet ();
213+ docFeatureHandler .evaluateAutomaticFeatureData ();
214+ break ;
215+ }
216+
168217 // Prepare classifier.
169- // System.out.println("***** Preparing classifier");
218+ logger . info ( " Preparing classifier" );
170219 writer = new Writer (loader .methods ());
171220 learner = new Learner (writer );
172221
173- double averageF = 0 ;
174- int iter = 0 ;
175- // Classify.
176- if (runSources ) {
177- averageF += runClassifier (new HashSet <Category >(Arrays .asList (Category .SOURCE , Category .NONE )), false );
178- iter ++;
179- }
180- if (runSinks ) {
181- averageF += runClassifier (new HashSet <Category >(Arrays .asList (Category .SINK , Category .NONE )), false );
182- iter ++;
183- }
184-
185- if (runSanitizers ) {
186- averageF += runClassifier (new HashSet <Category >(Arrays .asList (Category .SANITIZER , Category .NONE )),
187- false );
188- iter ++;
189- }
222+ /*
223+ FIRST PHASE - binary classification for each of the categories.
224+ (1) Classify: source, sink, sanitizer,
225+ auth-no-change, auth-unsafe-state, auth-safe-state
226+ (2) Classify: relevant
227+ */
228+ runClassEvaluation (false );
190229
191- if (runAuthentications ) {
192- averageF += runClassifier (
193- new HashSet <Category >(Arrays .asList (Category .AUTHENTICATION_TO_HIGH ,
194- Category .AUTHENTICATION_TO_LOW , Category .AUTHENTICATION_NEUTRAL , Category .NONE )),
195- false );
196- iter ++;
197- }
198230 // Save data from last classification.
199231 loader .resetMethods ();
200232
201233 // Cache the methods from the second test set.
202- // System.out.println("***** Loading 2nd test set");
203234 loader .pruneNone ();
204235
236+ /*
237+ SECOND PHASE - binary classification for each of the CWE categories.
238+ (1) Classify: cwe78, cwe079, cwe089, cwe306, cwe601, cwe862, cwe863
239+ */
240+ runClassEvaluation (true );
241+
242+ SwanConfig swanConfig = new SwanConfig ();
243+ Properties config = swanConfig .getConfig ();
244+ String fileName = config .getProperty ("output_file_name" );
245+
246+ String outputFile = outputDir + File .separator + fileName + ".json" ;
247+ logger .info ("Writing results to {}" , outputFile );
248+ writer .printResultsJSON (loader .methods (), outputFile );
249+
250+ long analysisTime = System .currentTimeMillis () - startAnalysisTime ;
251+ logger .info ("Total runtime {} mins" , analysisTime /60000 );
252+ }
253+ }
254+
255+ public void runClassEvaluation (boolean forCwe ) throws IOException , InterruptedException {
256+
257+ if (forCwe ) {
258+
259+ // Run classifications for all CWEs in JSON file.
205260 if (runCwes ) {
206- // Run classifications for all cwes in JSON file.
207261 for (String cweId : parser .cwe ()) {
208- averageF += runClassifier (
209- new HashSet < Category >( Arrays . asList ( Category . getCategoryForCWE ( cweId ), Category . NONE )),
210- true );
211- iter ++ ;
262+ // if (cweId.toLowerCase().contains("cwe306"))
263+ runClassifier (
264+ new HashSet <>( Arrays . asList ( Category . getCategoryForCWE ( cweId ), Category . NONE )),
265+ Learner . EVAL_MODE . CLASS ) ;
212266 }
213267 }
214- // System.out.println("***** F Measure is " + averageF/iter);
268+ } else {
215269
216- SwanConfig swanConfig = new SwanConfig ();
217- Properties config = swanConfig .getConfig ();
218- String fileName = config .getProperty ("output_file_name" );
270+ if (runSources ) {
271+ runClassifier (new HashSet <>(Arrays .asList (Category .SOURCE , Category .NONE )), Learner .EVAL_MODE .CLASS );
272+ }
273+
274+ if (runSinks ) {
275+ runClassifier (new HashSet <>(Arrays .asList (Category .SINK , Category .NONE )), Learner .EVAL_MODE .CLASS );
276+ }
277+
278+ if (runSanitizers ) {
279+ runClassifier (new HashSet <>(Arrays .asList (Category .SANITIZER , Category .NONE )),
280+ Learner .EVAL_MODE .CLASS );
281+ }
282+
283+ if (runAuthentications ) {
284+ runClassifier (
285+ new HashSet <>(Arrays .asList (Category .AUTHENTICATION_TO_HIGH ,
286+ Category .AUTHENTICATION_TO_LOW , Category .AUTHENTICATION_NEUTRAL , Category .NONE )),
287+ Learner .EVAL_MODE .CLASS );
288+ }
219289
220- // System.out.println("***** Writing final results");
221- // Set<String> tmpFiles = Util.getFiles(outputDir);
222- writer .printResultsTXT (loader .methods (),
223- outputDir + File .separator + "txt" + File .separator + fileName + ".txt" );
224- writer .writeResultsQWEL (loader .methods (),
225- outputDir + File .separator + "qwel" + File .separator + fileName + ".qwel" );
226- writer .writeResultsSoot (loader .methods (),
227- outputDir + File .separator + "soot-qwel" + File .separator + fileName + ".sqwel" );
228- writer .printResultsJSON (loader .methods (), outputDir + File .separator + fileName + ".json" );
229- writer .writeResultsQwelXML (loader .methods (), outputDir + File .separator + fileName + ".xml" );
290+ if (runRelevant ) {
291+ runClassifier (new HashSet <>(Arrays .asList (Category .RELEVANT , Category .NONE )), Learner .EVAL_MODE .RELEVANCE );
292+ }
230293 }
231294 }
232295
233- private double runClassifier ( HashSet < Category > categories , boolean cweMode )
234- throws IOException , InterruptedException {
296+
297+ private double runClassifier ( HashSet < Category > categories , Learner . EVAL_MODE eval_mode ) {
235298 parser .resetMethods ();
236299 loader .resetMethods ();
237- // System.out.println("***** Starting classification for " +
238- // categories.toString());
239- return learner .classify (parser .methods (), loader .methods (), featureHandler .features (), categories ,
240- outputPath + File .separator + "txt" + File .separator + "output.txt" , cweMode );
241- }
242300
301+ logger .info ("Starting classification for {}" , categories .toString ());
302+
303+ InstancesHandler instancesHandler = new InstancesHandler ();
304+ Instances instances = instancesHandler .createInstances (parser .getMethods (), featureHandler .features (), docFeatureHandler , categories , INSTANCE_TYPE );
305+ long startAnalysisTime = System .currentTimeMillis ();
306+
307+ learner .trainModel (instances , LEARNING_MODE );
308+
309+ long analysisTime = System .currentTimeMillis () - startAnalysisTime ;
310+ logger .info ("Total time for classification {}ms" , analysisTime );
311+
312+ return 0.0 ;
313+ }
243314}
0 commit comments