Skip to content

Commit 33a27a3

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents ef93b63 + 48712ee commit 33a27a3

File tree

124 files changed

+3394
-2755
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+3394
-2755
lines changed

swan-javadoc-coverage/pom.xml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
<dependency>
105105
<groupId>junit</groupId>
106106
<artifactId>junit</artifactId>
107-
<version>4.13.1</version>
107+
<version>4.13.2</version>
108108
<scope>test</scope>
109109
</dependency>
110110
<!--dependency>
@@ -117,13 +117,14 @@
117117
<dependency>
118118
<groupId>jdk.tools</groupId>
119119
<artifactId>jdk.tools</artifactId>
120+
<version>1.8</version>
120121
<scope>system</scope>
121122
<systemPath>${java.home}/../lib/tools.jar</systemPath>
122123
</dependency>
123124
<dependency>
124125
<groupId>commons-io</groupId>
125126
<artifactId>commons-io</artifactId>
126-
<version>2.7</version>
127+
<version>2.11.0</version>
127128
<scope>compile</scope>
128129
</dependency>
129130
</dependencies>
@@ -133,7 +134,7 @@
133134
<plugin>
134135
<groupId>org.apache.maven.plugins</groupId>
135136
<artifactId>maven-compiler-plugin</artifactId>
136-
<version>3.7.0</version>
137+
<version>3.10.0</version>
137138
<configuration>
138139
<source>1.8</source>
139140
<target>1.8</target>
@@ -155,7 +156,7 @@
155156
<plugin>
156157
<groupId>org.apache.maven.plugins</groupId>
157158
<artifactId>maven-javadoc-plugin</artifactId>
158-
<version>3.0.0</version>
159+
<version>3.3.2</version>
159160
<executions>
160161
<!-- Exports JavaDocs of the JavaDoc Coverage Plugin to regular HTML files -->
161162
<execution>

swan-pipeline/pom.xml

Lines changed: 63 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,16 @@
99
<packaging>jar</packaging>
1010

1111

12-
<name>SWAN Core</name>
13-
<description>SWAN uses fully automated machine-learning approaches to classify Java methods into security-relevant methods (SRM) and software vulnerabilities categories.
14-
The methods are classified into the following security-relevant method categories sources, sinks, sanitizers and authentication. For the software vulnerability classes,
15-
the following Common Weakness Enumeration (CWE) vulnerabilities are supported: OS Command Injection, Cross-site Scripting, SQL Injection, Missing Authentication, Open Redirect, Missing Authorisation, and Incorrect Authorisation.
16-
SWAN detects methods from the provided source code and outputs a list of methods that can be used to configure static analysis tools.
12+
<name>SWAN</name>
13+
<description>SWAN uses fully automated machine-learning approaches to classify Java methods into security-relevant
14+
methods (SRM) and software vulnerabilities categories.
15+
The methods are classified into the following security-relevant method categories sources, sinks, sanitizers and
16+
authentication. For the software vulnerability classes,
17+
the following Common Weakness Enumeration (CWE) vulnerabilities are supported: OS Command Injection, Cross-site
18+
Scripting, SQL Injection, Missing Authentication, Open Redirect, Missing Authorisation, and Incorrect
19+
Authorisation.
20+
SWAN detects methods from the provided source code and outputs a list of methods that can be used to configure
21+
static analysis tools.
1722
</description>
1823
<url>https://github.com/secure-software-engineering/swan</url>
1924

@@ -80,7 +85,7 @@
8085
<dependency>
8186
<groupId>nz.ac.waikato.cms.weka</groupId>
8287
<artifactId>weka-stable</artifactId>
83-
<version>3.8.5</version>
88+
<version>3.8.6</version>
8489
</dependency>
8590
<dependency>
8691
<groupId>com.googlecode.json-simple</groupId>
@@ -100,13 +105,13 @@
100105
<dependency>
101106
<groupId>edu.stanford.nlp</groupId>
102107
<artifactId>stanford-corenlp</artifactId>
103-
<version>4.3.0</version>
108+
<version>4.4.0</version>
104109
</dependency>
105110
<dependency>
106111
<groupId>edu.stanford.nlp</groupId>
107112
<artifactId>stanford-corenlp</artifactId>
108-
<version>4.3.0</version>
109-
<classifier>models</classifier>
113+
<version>4.4.0</version>
114+
<classifier>models-english</classifier>
110115
</dependency>
111116
<dependency>
112117
<groupId>org.jsoup</groupId>
@@ -121,9 +126,8 @@
121126
<dependency>
122127
<groupId>dev.jeka</groupId>
123128
<artifactId>jeka-core</artifactId>
124-
<version>0.9.0.M10</version>
129+
<version>0.9.15.RELEASE</version>
125130
</dependency>
126-
<!-- deeplearning4j-core: contains swanPipeline functionality and neural networks -->
127131
<dependency>
128132
<groupId>org.deeplearning4j</groupId>
129133
<artifactId>deeplearning4j-core</artifactId>
@@ -152,7 +156,7 @@
152156
<dependency>
153157
<groupId>ai.libs</groupId>
154158
<artifactId>mlplan-weka</artifactId>
155-
<version>0.2.3</version>
159+
<version>0.2.7</version>
156160
</dependency>
157161
<dependency>
158162
<groupId>org.graphstream</groupId>
@@ -162,24 +166,22 @@
162166
<dependency>
163167
<groupId>ai.libs</groupId>
164168
<artifactId>hasco-core</artifactId>
165-
<version>0.2.5</version>
169+
<version>0.2.7</version>
166170
</dependency>
167-
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-api -->
168171
<dependency>
169172
<groupId>org.slf4j</groupId>
170173
<artifactId>slf4j-api</artifactId>
171-
<version>1.7.32</version>
174+
<version>1.7.36</version>
172175
</dependency>
173-
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-simple -->
174176
<dependency>
175177
<groupId>org.slf4j</groupId>
176178
<artifactId>slf4j-simple</artifactId>
177-
<version>1.7.32</version>
179+
<version>1.7.36</version>
178180
</dependency>
179181
<dependency>
180182
<groupId>info.picocli</groupId>
181183
<artifactId>picocli</artifactId>
182-
<version>4.6.2</version>
184+
<version>4.6.3</version>
183185
</dependency>
184186
<dependency>
185187
<groupId>commons-io</groupId>
@@ -192,7 +194,6 @@
192194
<artifactId>jackson-databind</artifactId>
193195
<version>2.13.1</version>
194196
</dependency>
195-
<!-- https://mvnrepository.com/artifact/net.sf.meka/meka --><!-- https://mvnrepository.com/artifact/net.sf.meka/meka -->
196197
<dependency>
197198
<groupId>net.sf.meka</groupId>
198199
<artifactId>meka</artifactId>
@@ -244,17 +245,6 @@
244245
<target>1.8</target>
245246
</configuration>
246247
</plugin>
247-
<plugin>
248-
<artifactId>maven-clean-plugin</artifactId>
249-
<configuration>
250-
<filesets>
251-
<fileset>
252-
<directory>${basedir}/target</directory>
253-
<followSymlinks>false</followSymlinks>
254-
</fileset>
255-
</filesets>
256-
</configuration>
257-
</plugin>
258248
<plugin>
259249
<groupId>org.apache.maven.plugins</groupId>
260250
<artifactId>maven-source-plugin</artifactId>
@@ -268,28 +258,55 @@
268258
</execution>
269259
</executions>
270260
</plugin>
271-
272-
<!-- This creates a runnable JAR that can be used from the command line. -->
273261
<plugin>
274262
<groupId>org.apache.maven.plugins</groupId>
275-
<artifactId>maven-assembly-plugin</artifactId>
263+
<artifactId>maven-shade-plugin</artifactId>
264+
<version>3.2.4</version>
276265
<executions>
277266
<execution>
278267
<phase>package</phase>
279268
<goals>
280-
<goal>single</goal>
269+
<goal>shade</goal>
281270
</goals>
282271
<configuration>
283-
<archive>
284-
<manifest>
285-
<mainClass>
286-
de.fraunhofer.iem.swan.Main
287-
</mainClass>
288-
</manifest>
289-
</archive>
290-
<descriptorRefs>
291-
<descriptorRef>jar-with-dependencies</descriptorRef>
292-
</descriptorRefs>
272+
<transformers>
273+
<!-- adding Main-Class to manifest file -->
274+
<transformer
275+
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
276+
<mainClass>de.fraunhofer.iem.swan.Main</mainClass>
277+
</transformer>
278+
</transformers>
279+
<minimizeJar>true</minimizeJar>
280+
<filters>
281+
<filter>
282+
<artifact>*:*</artifact>
283+
<excludes>
284+
<exclude>META-INF/*.SF</exclude>
285+
<exclude>META-INF/*.DSA</exclude>
286+
<exclude>META-INF/*.RSA</exclude>
287+
</excludes>
288+
</filter>
289+
<filter>
290+
<artifact>edu.stanford.nlp:stanford-corenlp:models-english</artifact>
291+
<excludes>
292+
<exclude>edu/stanford/nlp/models/srparser/**</exclude>
293+
<exclude>edu/stanford/nlp/models/ner/**</exclude>
294+
<exclude>edu/stanford/nlp/models/sentiment/**</exclude>
295+
</excludes>
296+
</filter>
297+
<filter>
298+
<artifact>org.bytedeco</artifact>
299+
<excludes>
300+
<exclude>**</exclude>
301+
</excludes>
302+
</filter>
303+
<!--filter>
304+
<artifact>org.nd4j</artifact>
305+
<excludes>
306+
<exclude>**</exclude>
307+
</excludes>
308+
</filter-->
309+
</filters>
293310
</configuration>
294311
</execution>
295312
</executions>
@@ -306,7 +323,7 @@
306323
<plugin>
307324
<groupId>org.apache.maven.plugins</groupId>
308325
<artifactId>maven-javadoc-plugin</artifactId>
309-
<version>3.3.1</version>
326+
<version>3.3.2</version>
310327
<executions>
311328
<execution>
312329
<id>attach-javadocs</id>
@@ -332,7 +349,7 @@
332349
<plugin>
333350
<groupId>org.apache.maven.plugins</groupId>
334351
<artifactId>maven-gpg-plugin</artifactId>
335-
<version>1.6</version>
352+
<version>3.0.1</version>
336353
<executions>
337354
<execution>
338355
<id>sign-artifacts</id>

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
package de.fraunhofer.iem.swan;
22

33
import de.fraunhofer.iem.swan.cli.SwanOptions;
4+
import de.fraunhofer.iem.swan.data.Method;
45
import de.fraunhofer.iem.swan.features.FeatureSetSelector;
56
import de.fraunhofer.iem.swan.features.IFeatureSet;
6-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
7+
import de.fraunhofer.iem.swan.io.dataset.Dataset;
78
import de.fraunhofer.iem.swan.io.dataset.SrmList;
89
import de.fraunhofer.iem.swan.io.dataset.SrmListUtils;
910
import de.fraunhofer.iem.swan.model.ModelEvaluator;
11+
import de.fraunhofer.iem.swan.soot.Soot;
1012
import de.fraunhofer.iem.swan.util.Util;
1113
import org.slf4j.Logger;
1214
import org.slf4j.LoggerFactory;
1315

1416
import java.io.IOException;
17+
import java.util.Set;
1518

1619
/**
1720
* Runner for SWAN
@@ -23,6 +26,7 @@ public class SwanPipeline {
2326

2427
private static final Logger logger = LoggerFactory.getLogger(SwanPipeline.class);
2528
public static SwanOptions options;
29+
private ModelEvaluator modelEvaluator;
2630

2731
public SwanPipeline(SwanOptions options) {
2832
SwanPipeline.options = options;
@@ -38,24 +42,37 @@ public void run() throws IOException, InterruptedException {
3842

3943
long startAnalysisTime = System.currentTimeMillis();
4044

45+
//Run Soot
46+
Soot soot = new Soot(options.getTrainDataDir(), options.getTestDataDir());
47+
4148
// Load methods in training dataset
42-
SrmList dataset = SrmListUtils.importFile(options.getDatasetJson(), options.getTrainDataDir());
43-
logger.info("Loaded {} training methods, distribution={}", dataset.getMethods().size(), Util.countCategories(dataset.getMethods()));
49+
Dataset dataset = new Dataset();
50+
dataset.setTrain(SrmListUtils.importFile(options.getDatasetJson()));
51+
52+
if (!options.getTrainDataDir().isEmpty())
53+
soot.cleanupList(dataset.getTrain());
54+
55+
logger.info("Loaded {} training methods, distribution={}", dataset.getTrainMethods().size(), Util.countCategories(dataset.getTrainMethods()));
4456

4557
//Load methods from the test set
46-
logger.info("Loading test JARs in {}", options.getTestDataDir());
47-
SourceFileLoader testDataset = new SourceFileLoader(options.getTestDataDir());
48-
testDataset.load(dataset.getMethods());
58+
dataset.setTest(new SrmList(options.getTestDataDir()));
59+
Set<Method> testMethods = soot.loadMethods(dataset.getTest().getTestClasses());
60+
dataset.getTest().setMethods(testMethods);
61+
logger.info("Loaded {} methods from {}", testMethods.size(), options.getTestDataDir());
4962

5063
//Initialize and populate features
5164
FeatureSetSelector featureSetSelector = new FeatureSetSelector();
52-
IFeatureSet featureSet = featureSetSelector.select(dataset, testDataset, options);
65+
IFeatureSet featureSet = featureSetSelector.select(dataset, options);
5366

5467
//Train and evaluate model for SRM and CWE categories
55-
ModelEvaluator modelEvaluator = new ModelEvaluator(featureSet, options, testDataset.getMethods());
68+
modelEvaluator = new ModelEvaluator(featureSet, options, dataset.getTestMethods());
5669
modelEvaluator.trainModel();
5770

5871
long analysisTime = System.currentTimeMillis() - startAnalysisTime;
5972
logger.info("Total runtime {} minutes", analysisTime / 60000);
6073
}
74+
75+
public ModelEvaluator getModelEvaluator() {
76+
return modelEvaluator;
77+
}
6178
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/cli/CliRunner.java

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import picocli.CommandLine;
44

5+
import java.util.ArrayList;
56
import java.util.Collections;
67
import java.util.List;
78
import java.util.concurrent.Callable;
@@ -11,16 +12,19 @@
1112
public class CliRunner implements Callable<Integer> {
1213

1314
@CommandLine.Option(names = {"-test", "--test-data"}, description = {"Path of test JARs or class files"})
14-
private String testDataDir = "/input/test-data";
15+
private String testDataDir = "";
1516

1617
@CommandLine.Option(names = {"-train", "--train-data"}, description = {"Path of training JARs or class files"})
17-
private String trainDataDir = "/input/train-data";
18+
private String trainDataDir = "";
1819

1920
@CommandLine.Option(names = {"-d", "--dataset"}, description = {"Path to JSON dataset file"})
20-
private String datasetJson = "/input/dataset/swan-dataset.json";
21+
private String datasetJson = "/dataset/swan-dataset.json";
22+
23+
@CommandLine.Option(names = {"-in", "--train-instances"}, description = {"Path to ARFF files that contain training instances"})
24+
private List<String> instancesArff = new ArrayList<>();
2125

2226
@CommandLine.Option(names = {"-o", "--output"}, description = {"Directory to save output files"})
23-
private String outputDir = "/swan-output";
27+
private String outputDir = "";
2428

2529
@CommandLine.Option(names = {"-f", "--feature"}, description = {"Select one or more feature sets: all, code, doc-auto or doc-manual"})
2630
private List<String> featureSet = Collections.singletonList("code");
@@ -52,9 +56,7 @@ public class CliRunner implements Callable<Integer> {
5256
@CommandLine.Option(names = {"-pt", "--prediction-threshold"}, description = {"Threshold for predicting categories"})
5357
private double predictionThreshold = 0.5;
5458

55-
56-
@Override
57-
public Integer call() throws Exception {
59+
public SwanOptions initializeOptions(){
5860

5961
SwanOptions options = new SwanOptions(testDataDir,
6062
trainDataDir,
@@ -70,7 +72,14 @@ public Integer call() throws Exception {
7072
split,
7173
phase);
7274
options.setPredictionThreshold(predictionThreshold);
75+
options.setInstances(instancesArff);
76+
77+
return options;
78+
}
79+
80+
@Override
81+
public Integer call() throws Exception {
7382

74-
return new SwanCli().run(options);
83+
return new SwanCli().run(initializeOptions());
7584
}
7685
}

0 commit comments

Comments
 (0)