Skip to content

Commit 43a8eb6

Browse files
authored
Merge pull request #53 from secure-software-engineering/develop
Fix/refactor feature extraction and classification modules
2 parents 018b4a4 + 3ba98d4 commit 43a8eb6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+17508
-896
lines changed

doc-coverage-doclet/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>de.fraunhofer.iem</groupId>
88
<artifactId>coverage-doclet</artifactId>
9-
<version>1.0-SNAPSHOT</version>
9+
<version>1.1</version>
1010

1111
<properties>
1212
<maven.compiler.source>11</maven.compiler.source>

doc-xml-exporter-doclet/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<groupId>de.fraunhofer.iem.doclet</groupId>
1212
<artifactId>xml-doclet</artifactId>
13-
<version>1.3</version>
13+
<version>1.4</version>
1414

1515
<properties>
1616
<maven.compiler.source>11</maven.compiler.source>

swan-pipeline/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@
192192
<dependency>
193193
<groupId>com.fasterxml.jackson.core</groupId>
194194
<artifactId>jackson-databind</artifactId>
195-
<version>2.13.2.1</version>
195+
<version>2.13.2.2</version>
196196
</dependency>
197197
<dependency>
198198
<groupId>net.sf.meka</groupId>

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,8 @@
44
import de.fraunhofer.iem.swan.features.FeatureSetSelector;
55
import de.fraunhofer.iem.swan.features.IFeatureSet;
66
import de.fraunhofer.iem.swan.io.dataset.Dataset;
7-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
8-
import de.fraunhofer.iem.swan.io.dataset.SrmListUtils;
9-
import de.fraunhofer.iem.swan.io.doc.JavadocProcessor;
7+
import de.fraunhofer.iem.swan.io.dataset.DatasetProcessor;
108
import de.fraunhofer.iem.swan.model.ModelEvaluator;
11-
import de.fraunhofer.iem.swan.soot.Soot;
12-
import de.fraunhofer.iem.swan.util.Util;
139
import org.slf4j.Logger;
1410
import org.slf4j.LoggerFactory;
1511

@@ -41,26 +37,9 @@ public void run() throws IOException, InterruptedException {
4137

4238
long startAnalysisTime = System.currentTimeMillis();
4339

44-
//Run Soot
45-
Soot soot = new Soot(options.getTrainDataDir(), options.getTestDataDir());
46-
47-
// Load methods in training dataset
48-
Dataset dataset = new Dataset();
49-
dataset.setTrain(SrmListUtils.importFile(options.getDatasetJson()));
50-
51-
if (!options.getTrainDataDir().isEmpty())
52-
soot.cleanupList(dataset.getTrain());
53-
54-
logger.info("Loaded {} training methods, distribution={}", dataset.getTrainMethods().size(), Util.countCategories(dataset.getTrainMethods()));
55-
56-
//Load methods from the test set
57-
dataset.setTest(new SrmList(options.getTestDataDir()));
58-
dataset.getTest().setMethods(soot.loadMethods(dataset.getTest().getTestClasses()));
59-
60-
//Extract doc comments and add to test set, if option is selected
61-
JavadocProcessor javadocProcessor = new JavadocProcessor(options.getTestDataSourceDir(), options.getOutputDir());
62-
javadocProcessor.run(dataset.getTestMethods(), options.getFeatureSet());
63-
logger.info("Loaded {} methods from {}", dataset.getTestMethods().size(), options.getTestDataDir());
40+
//Create train and test datasets
41+
DatasetProcessor datasetProcessor = new DatasetProcessor(options);
42+
Dataset dataset = datasetProcessor.run();
6443

6544
//Initialize and populate features
6645
FeatureSetSelector featureSetSelector = new FeatureSetSelector();

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/cli/CliRunner.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ public class CliRunner implements Callable<Integer> {
3232
@CommandLine.Option(names = {"-o", "--output"}, description = {"Directory to save output files"})
3333
private String outputDir = "";
3434

35-
@CommandLine.Option(names = {"-f", "--feature"}, description = {"Select one or more feature sets: all, code, doc-auto or doc-manual"})
35+
@CommandLine.Option(names = {"-f", "--feature"}, arity = "1..*", description = {"Select one or more feature sets: all, code, doc-auto or doc-manual"})
3636
private List<String> featureSet = Collections.singletonList("code");
3737

38-
@CommandLine.Option(names = {"-t", "--toolkit"}, description = {"ML toolkit: meka, weka, ml-plan"})
38+
@CommandLine.Option(names = {"-t", "--toolkit"}, description = {"ML toolkit: meka, weka, autoweka, mlplan"})
3939
private String toolkit = "meka";
4040

4141
@CommandLine.Option(names = {"-s", "--srm"}, description = {"SRM: all, source, sink, sanitizer, authentication, relevant"})
@@ -50,6 +50,9 @@ public class CliRunner implements Callable<Integer> {
5050
@CommandLine.Option(names = {"-doc", "--documented"}, description = {"Use only methods with Javadoc"})
5151
private boolean isDocumented = true;
5252

53+
@CommandLine.Option(names = {"-att", "--attribute-selection"}, description = {"Use attribute selection"})
54+
private boolean reduceAttributes = false;
55+
5356
@CommandLine.Option(names = {"-i", "--iterations"}, description = {"Number of iterations for training"})
5457
private int iterations = 10;
5558

@@ -62,6 +65,12 @@ public class CliRunner implements Callable<Integer> {
6265
@CommandLine.Option(names = {"-pt", "--prediction-threshold"}, description = {"Threshold for predicting categories"})
6366
private double predictionThreshold = 0.5;
6467

68+
@CommandLine.Option(names = {"-ds", "--discovery"}, arity = "1..*", description = {"Select discovery for training set SRMs"})
69+
private List<String> discovery = Collections.singletonList("manual");
70+
71+
@CommandLine.Option(names = {"-tl", "--timelimit"}, description = {"Time (minutes) to execute operation "})
72+
private int timeLimit = 1;
73+
6574
public SwanOptions initializeOptions(){
6675

6776
SwanOptions options = new SwanOptions(testDataDir,
@@ -81,6 +90,9 @@ public SwanOptions initializeOptions(){
8190
options.setInstances(arffInstancesFiles);
8291
options.setTrainDataSourceDir(trainDataDirSource);
8392
options.setTestDataSourceDir(testDataSourceDir);
93+
options.setReduceAttributes(false);
94+
options.setDiscovery(discovery);
95+
options.setTimeLimit(timeLimit);
8496

8597
return options;
8698
}

0 commit comments

Comments
 (0)