Skip to content

Commit cbb1008

Browse files
committed
Add Jackson annotations and refactor SRM file loader
1 parent 5d298ed commit cbb1008

File tree

6 files changed

+55
-128
lines changed

6 files changed

+55
-128
lines changed

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 4 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package de.fraunhofer.iem.swan;
22

3+
import com.fasterxml.jackson.databind.ObjectMapper;
34
import de.fraunhofer.iem.swan.cli.SwanOptions;
45
import de.fraunhofer.iem.swan.data.Category;
56
import de.fraunhofer.iem.swan.data.Method;
@@ -54,86 +55,9 @@ public void run() throws IOException, InterruptedException {
5455

5556
long startAnalysisTime = System.currentTimeMillis();
5657

57-
// Cache the list of classes and the CP.
58-
Set<String> testClasses = Util.getAllClassesFromDirectory(options.getTestData());
59-
String testCp = Util.buildCP(options.getTestData());
60-
61-
logger.info("Loading train data from {}", options.getTrainData());
62-
String trainingCp = Util.buildCP(options.getTrainData());
63-
64-
// Cache the methods from the training set.
65-
parser = new Parser(trainingCp);
66-
parser.loadTrainingSet(Collections.singleton(options.getDatasetJson()));
67-
logger.info("{} training methods, distribution={}",
68-
parser.methods().size(), Util.countCategories(parser.methods(), false));
69-
70-
//Remove methods that do not have method doc comments
71-
parser.removeUndocumentedMethods();
72-
logger.info("Remove undocumented training methods. Remaining {}, distribution={}",
73-
parser.methods().size(), Util.countCategories(parser.methods(), true));
74-
75-
// Cache the methods from the testing set.
76-
logger.info("Loading test data from {}", options.getTestData());
77-
loader = new Loader(testCp);
78-
loader.loadTestSet(testClasses, parser.methods());
79-
80-
// Cache the features.
81-
logger.info("Loading feature instances");
82-
featureHandler = new FeatureHandler(trainingCp + System.getProperty("path.separator") + testCp);
83-
featureHandler.initializeFeatures();
84-
85-
Set<Method> methods = new HashSet<>();
86-
87-
for (Method method : parser.methods()) {
88-
List<String> words = Arrays.asList(StringUtils.split(method.getJavadoc().getMethodComment(), " "));
89-
if (method.getJavadoc().getMethodComment().length() > 0 || words.size() > 1
90-
) {
91-
// if (method.getDiscovery().contentEquals("manual")) {
92-
methods.add(method);
93-
//if(method.getCategoriesTrained().contains(Category.AUTHENTICATION_NEUTRAL))
94-
// System.out.println(method.getJavaSignature());
95-
}
96-
}
97-
98-
//Populate SWAN feature attributes
99-
docFeatureHandler = null;
100-
InstancesHandler.FeatureSet feature = InstancesHandler.FeatureSet.getValue(Integer.parseInt(options.getFeatureSet()));
101-
102-
switch (feature) {
103-
case SWANDOC_MANUAL:
104-
case SWAN_SWANDOC_MANUAL:
105-
106-
docFeatureHandler = new DocFeatureHandler(methods);
107-
docFeatureHandler.initialiseManualFeatureSet();
108-
docFeatureHandler.evaluateManualFeatureData();
109-
break;
110-
case SWANDOC_WORD_EMBEDDING:
111-
case SWAN_SWANDOC_WORD_EMBEDDING:
112-
113-
docFeatureHandler = new DocFeatureHandler(methods);
114-
docFeatureHandler.initialiseAutomaticFeatureSet();
115-
docFeatureHandler.evaluateAutomaticFeatureData();
116-
break;
117-
}
118-
119-
// Prepare classifier.
120-
logger.info("Preparing classifier");
121-
writer = new Writer(loader.methods());
122-
learner = new Learner(writer);
123-
124-
Learner.Mode learnerMode = Learner.Mode.valueOf(options.getLearningMode().toUpperCase());
125-
126-
/*
127-
FIRST PHASE - binary classification for each of the categories.
128-
(1) Classify: source, sink, sanitizer,
129-
auth-no-change, auth-unsafe-state, auth-safe-state
130-
(2) Classify: relevant
131-
*/
132-
133-
//Store predictions for each classifier and iteration.
134-
predictions = new HashMap<>();
135-
for (int x = 0; x < 10; x++)
136-
predictions.put(Integer.toString(x), new HashSet<>());
58+
// Load methods in training dataset
59+
SrmList dataset = SrmListUtils.importFile(options.getDatasetJson(), options.getTrainDataDir());
60+
logger.info("Loaded {} training methods, distribution={}", dataset.getMethods().size(), Util.countCategories(dataset.getMethods()));
13761

13862
//Load methods from the test set
13963
logger.info("Loading test JARs in {}", options.getTestDataDir());

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/data/Category.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package de.fraunhofer.iem.swan.data;
22

3+
import com.fasterxml.jackson.annotation.JsonValue;
4+
35
/**
46
* Categories for the learner.
57
*
@@ -38,6 +40,11 @@ public boolean isCwe() {
3840
return cwe;
3941
}
4042

43+
@JsonValue
44+
public String getId() {
45+
return id;
46+
}
47+
4148
@Override
4249
public String toString() {
4350
return id;
@@ -52,8 +59,8 @@ public static Category getCategoryForCWE(String cweName) {
5259

5360
public static Category fromText(String text) {
5461

55-
for(Category cat: Category.values()){
56-
if(cat.name().equalsIgnoreCase(text)){
62+
for (Category cat : Category.values()) {
63+
if (cat.name().equalsIgnoreCase(text)) {
5764
return cat;
5865
}
5966
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/data/Javadoc.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11

22
package de.fraunhofer.iem.swan.data;
33

4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
46
/**
57
* Stores the Javadoc comment for a method and the class it belongs to.
68
*
79
* @author Oshando Johnson on 23.07.20
810
*/
911
public class Javadoc {
1012

13+
@JsonProperty("method")
1114
private String methodComment;
15+
@JsonProperty("class")
1216
private String classComment;
1317

1418
public Javadoc() {

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/data/RelevantPart.java

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,38 +4,41 @@
44
* POJO for the Relevant parts of a method
55
*
66
* @author Goran Piskachev
7-
*
87
*/
98

9+
import com.fasterxml.jackson.annotation.JsonProperty;
10+
1011
import java.util.ArrayList;
1112
import java.util.List;
1213

1314
public class RelevantPart {
14-
private boolean returnValue = false;
15-
private List<Integer> parameterIndeces = new ArrayList<Integer>();
1615

17-
public RelevantPart(boolean rT, List<Integer> parInd) {
18-
setReturnValue(rT);
19-
setParameterIndeces(parInd);
20-
}
16+
@JsonProperty("return")
17+
private boolean returnValue = false;
18+
private List<Integer> parameters = new ArrayList<Integer>();
19+
20+
public RelevantPart(boolean rT, List<Integer> parInd) {
21+
setReturnValue(rT);
22+
setParameters(parInd);
23+
}
2124

22-
public RelevantPart() {
25+
public RelevantPart() {
2326

24-
}
27+
}
2528

26-
public List<Integer> getParameterIndeces() {
27-
return parameterIndeces;
28-
}
29+
public List<Integer> getParameters() {
30+
return parameters;
31+
}
2932

30-
public void setParameterIndeces(List<Integer> parameterIndeces) {
31-
this.parameterIndeces = parameterIndeces;
32-
}
33+
public void setParameters(List<Integer> parameters) {
34+
this.parameters = parameters;
35+
}
3336

34-
public boolean getReturnValue() {
35-
return returnValue;
36-
}
37+
public boolean getReturnValue() {
38+
return returnValue;
39+
}
3740

38-
public void setReturnValue(boolean returnValue) {
39-
this.returnValue = returnValue;
40-
}
41+
public void setReturnValue(boolean returnValue) {
42+
this.returnValue = returnValue;
43+
}
4144
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/doc/nlp/DocCommentVector.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
package de.fraunhofer.iem.swan.features.doc.nlp;
22

33
import de.fraunhofer.iem.swan.data.Method;
4-
import de.fraunhofer.iem.swan.io.dataset.Parser;
5-
4+
import de.fraunhofer.iem.swan.io.dataset.SrmList;
65
import edu.stanford.nlp.util.StringUtils;
76
import org.apache.commons.io.FileUtils;
87
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors;
@@ -15,7 +14,6 @@
1514
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
1615
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
1716
import org.nd4j.common.io.ClassPathResource;
18-
import org.nd4j.linalg.cpu.nativecpu.NDArray;
1917

2018
import java.io.File;
2119
import java.io.IOException;
@@ -42,15 +40,15 @@ public static void main(String[] args) {
4240

4341
//Export method and class doc comments
4442

45-
Parser parser = new Parser();
46-
parser.parse("/swan-dataset.json");
43+
SrmList srmListUtils = new SrmList();
44+
//parser.parse("/swan-dataset.json");
4745

4846
// for(Method method: parser.getMethods()){
4947
// if(method.getJavadoc().getMethodComment().length()>0)
5048
// System.out.println(NLPUtils.cleanText(method.getJavadoc().getMergedComments()));
5149
// }
5250

53-
for(Method method: parser.getMethods()){
51+
for(Method method: srmListUtils.getMethods()){
5452
List<String> words = StringUtils.split(method.getJavadoc().getMethodComment(), " ");
5553
if(method.getJavadoc().getMethodComment().length()>0 && words.size()>1)
5654
System.out.println(NLPUtils.cleanFirstSentence(method.getJavadoc().getMethodComment())+" "+NLPUtils.cleanFirstSentence(method.getJavadoc().getClassComment()));

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/training/TrainingSetUpdater.java

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
package de.fraunhofer.iem.swan.training;
22

3-
import de.fraunhofer.iem.swan.features.code.soot.Loader;
4-
import de.fraunhofer.iem.swan.io.dataset.Parser;
3+
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
4+
import de.fraunhofer.iem.swan.io.dataset.SrmList;
55
import de.fraunhofer.iem.swan.util.Util;
6-
import de.fraunhofer.iem.swan.io.dataset.Writer;
76
import de.fraunhofer.iem.swan.data.Category;
87
import de.fraunhofer.iem.swan.data.Method;
98
import org.apache.commons.io.FileUtils;
@@ -44,23 +43,19 @@ public static void main(String[] args) throws IOException {
4443
uniqueMethod = new HashSet<>();
4544

4645
//Load original training file methods
47-
Parser parser = new Parser();
48-
parser.parse(ORIGINAL_TRAINING_FILE);
46+
SrmList srmListUtils = new SrmList();
47+
//parser.parse(ORIGINAL_TRAINING_FILE);
4948

50-
for (Method m : parser.methods())
51-
uniqueMethod.add(m.getClassName() + "." + m.getMethodName());
49+
for (Method m : srmListUtils.getMethods())
50+
uniqueMethod.add(m.getClassName() + "." + m.getName());
5251

5352
//Add training methods to master list
54-
Set<Method> trainingMethods = new HashSet<>(parser.getMethods());
53+
Set<Method> trainingMethods = new HashSet<>(srmListUtils.getMethods());
5554

5655
//Load methods from text file: extracted from thecodemaster.com and find-sec-bugs plugin
5756
loadClassesAndMethods("/swan/swan_core/src/main/resources/swandoc-new-training-data.txt");
5857

5958
trainingMethods.addAll(extractMethodData(classes, method));
60-
61-
//Export new training file
62-
Writer writer = new Writer();
63-
writer.outputJSONFile(trainingMethods, NEW_TRAINING_FILE);
6459
}
6560

6661
public static void extractManualList() throws IOException {
@@ -73,20 +68,16 @@ public static void extractManualList() throws IOException {
7368
uniqueMethod = new HashSet<>();
7469

7570
//Load original training file methods
76-
Parser parser = new Parser();
77-
parser.parse(NEW_TRAINING_FILE);
71+
SrmList srmListUtils = new SrmList();
72+
//parser.parse(NEW_TRAINING_FILE);
7873

7974
Set<Method> trainingMethods = new HashSet<>();
8075

81-
for (Method m : parser.methods()){
76+
for (Method m : srmListUtils.getMethods()){
8277
if(m.getJavaSignature().contains("org.owasp.esapi.reference.DefaultEncoder") &&
8378
m.getDiscovery().contentEquals("find-sec-bugs"))
8479
trainingMethods.add(m);
8580
}
86-
87-
//Export new training file
88-
Writer writer = new Writer();
89-
writer.outputJSONFile(trainingMethods, MANUAL_TRAINING_FILE);
9081
}
9182

9283
/**

0 commit comments

Comments
 (0)