Skip to content

Commit 48712ee

Browse files
authored
Merge pull request #44 from secure-software-engineering/hotfix/reduce-jar-size
Reduce JAR size and remove training JARs
2 parents 62ee791 + 7bba930 commit 48712ee

File tree

80 files changed

+65
-37
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+65
-37
lines changed

swan-pipeline/pom.xml

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -258,28 +258,55 @@
258258
</execution>
259259
</executions>
260260
</plugin>
261-
262-
<!-- This creates a runnable JAR that can be used from the command line. -->
263261
<plugin>
264262
<groupId>org.apache.maven.plugins</groupId>
265-
<artifactId>maven-assembly-plugin</artifactId>
263+
<artifactId>maven-shade-plugin</artifactId>
264+
<version>3.2.4</version>
266265
<executions>
267266
<execution>
268267
<phase>package</phase>
269268
<goals>
270-
<goal>single</goal>
269+
<goal>shade</goal>
271270
</goals>
272271
<configuration>
273-
<archive>
274-
<manifest>
275-
<mainClass>
276-
de.fraunhofer.iem.swan.Main
277-
</mainClass>
278-
</manifest>
279-
</archive>
280-
<descriptorRefs>
281-
<descriptorRef>jar-with-dependencies</descriptorRef>
282-
</descriptorRefs>
272+
<transformers>
273+
<!-- adding Main-Class to manifest file -->
274+
<transformer
275+
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
276+
<mainClass>de.fraunhofer.iem.swan.Main</mainClass>
277+
</transformer>
278+
</transformers>
279+
<minimizeJar>true</minimizeJar>
280+
<filters>
281+
<filter>
282+
<artifact>*:*</artifact>
283+
<excludes>
284+
<exclude>META-INF/*.SF</exclude>
285+
<exclude>META-INF/*.DSA</exclude>
286+
<exclude>META-INF/*.RSA</exclude>
287+
</excludes>
288+
</filter>
289+
<filter>
290+
<artifact>edu.stanford.nlp:stanford-corenlp:models-english</artifact>
291+
<excludes>
292+
<exclude>edu/stanford/nlp/models/srparser/**</exclude>
293+
<exclude>edu/stanford/nlp/models/ner/**</exclude>
294+
<exclude>edu/stanford/nlp/models/sentiment/**</exclude>
295+
</excludes>
296+
</filter>
297+
<filter>
298+
<artifact>org.bytedeco</artifact>
299+
<excludes>
300+
<exclude>**</exclude>
301+
</excludes>
302+
</filter>
303+
<!--filter>
304+
<artifact>org.nd4j</artifact>
305+
<excludes>
306+
<exclude>**</exclude>
307+
</excludes>
308+
</filter-->
309+
</filters>
283310
</configuration>
284311
</execution>
285312
</executions>

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/features/doc/nlp/CoreNLPExecutor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public class CoreNLPExecutor {
2828
public CoreNLPExecutor() {
2929
logger.info("Initializing CoreNLP pipeline");
3030
properties = new Properties();
31-
properties.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
31+
properties.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse");
3232
pipeline = new StanfordCoreNLP(properties);
3333
}
3434

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/io/DependencyManager.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package de.fraunhofer.iem.swan.io;
22

33
import dev.jeka.core.api.depmanagement.*;
4+
import dev.jeka.core.api.depmanagement.resolution.JkDependencyResolver;
5+
import dev.jeka.core.api.depmanagement.resolution.JkResolvedDependencyNode;
46
import edu.stanford.nlp.util.StringUtils;
57

68
import java.nio.file.Path;
@@ -29,25 +31,23 @@ public class DependencyManager {
2931
public List<Path> getSourceJar(String module) {
3032

3133
JkDependencySet sources = JkDependencySet.of()
32-
.and(module)
33-
.withDefaultScopes(JkScope.SOURCES);
34+
.and(module);
3435

35-
return resolver.resolve(sources, JkScope.SOURCES).getFiles().getEntries();
36+
return resolver.resolve(sources).getFiles().getEntries();
3637
}
3738

3839
public String getDependencies(String moduleDescription) {
3940
JkDependencySet deps = JkDependencySet.of()
40-
.and(moduleDescription)
41-
.withDefaultScopes(JkScope.COMPILE);
41+
.and(moduleDescription);
4242

4343
resolver = JkDependencyResolver.of().addRepos(JkRepo.ofMavenCentral());
4444

4545
//Get list of dependencies for module
46-
List<JkDependencyNode> nodes = resolver.resolve(deps, JkScope.COMPILE).getDependencyTree().toFlattenList();
46+
List<JkResolvedDependencyNode> nodes = resolver.resolve(deps).getDependencyTree().toFlattenList();
4747

4848
Set<String> dependencies = new HashSet<>();
4949

50-
for (JkDependencyNode node : nodes)
50+
for (JkResolvedDependencyNode node : nodes)
5151
dependencies.add(node.getModuleInfo().getModuleId().toString()
5252
+ ":" + node.getModuleInfo().getResolvedVersion().toString());
5353

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/model/toolkit/MLPlan.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import ai.libs.jaicore.ml.core.dataset.serialization.ArffDatasetAdapter;
66
import ai.libs.jaicore.ml.core.filter.SplitterUtil;
77
import ai.libs.jaicore.ml.weka.classification.learner.IWekaClassifier;
8-
import ai.libs.mlplan.multiclass.wekamlplan.MLPlanWekaBuilder;
8+
import ai.libs.mlplan.weka.MLPlanWekaBuilder;
99
import de.fraunhofer.iem.swan.model.MonteCarloValidator;
1010
import de.fraunhofer.iem.swan.util.Util;
1111
import org.api4.java.ai.ml.core.dataset.schema.attribute.IAttribute;
@@ -50,6 +50,7 @@ public MLPlan() {
5050
public HashMap<String, ArrayList<Double>> evaluateDataset(Instances instances1) {
5151

5252
String arffFilePath = Util.exportInstancesToArff(instances1);
53+
ArffDatasetAdapter arffDatasetAdapter = new ArffDatasetAdapter();
5354

5455
String mClass = Util.getClassName(instances1);
5556

@@ -58,7 +59,7 @@ public HashMap<String, ArrayList<Double>> evaluateDataset(Instances instances1)
5859
//Initialize dataset using ARFF file path
5960
ILabeledDataset<?> dataset = null;
6061
try {
61-
dataset = ArffDatasetAdapter.readDataset(new File(arffFilePath));
62+
dataset = arffDatasetAdapter.readDataset(new File(arffFilePath));
6263
} catch (DatasetDeserializationFailedException e) {
6364
e.printStackTrace();
6465
}
@@ -95,14 +96,14 @@ public HashMap<String, ArrayList<Double>> evaluateDataset(Instances instances1)
9596
//optimizedClassifier.fit(split.get(0));
9697

9798
String trainPath = "swan/swan_core/swan-out/mlplan/train-methods-dataset.arff";
98-
ArffDatasetAdapter.serializeDataset(new File(trainPath), split.get(0));
99+
arffDatasetAdapter.serializeDataset(new File(trainPath), split.get(0));
99100
ArffLoader trainLoader = new ArffLoader();
100101
trainLoader.setFile(new File(trainPath));
101102
Instances trainInstances = trainLoader.getDataSet();
102103
trainInstances.setClassIndex(trainInstances.numAttributes() - 1);
103104

104105
String testPath = "swan/swan_core/swan-out/mlplan/test-methods-dataset.arff";
105-
ArffDatasetAdapter.serializeDataset(new File(testPath), split.get(1));
106+
arffDatasetAdapter.serializeDataset(new File(testPath), split.get(1));
106107
ArffLoader testLoader = new ArffLoader();
107108
testLoader.setFile(new File(testPath));
108109
Instances testInstances = testLoader.getDataSet();
@@ -148,6 +149,7 @@ public void evaluateDataset(Instances instances, int k) {
148149

149150
//arffFilePath = "swan/swan_core/src/main/resources/waveform.arff";
150151
String arffFilePath = Util.exportInstancesToArff(instances);
152+
ArffDatasetAdapter arffDatasetAdapter = new ArffDatasetAdapter();
151153

152154
String mClass = Util.getClassName(instances);
153155

@@ -156,7 +158,7 @@ public void evaluateDataset(Instances instances, int k) {
156158
//Initialize dataset using ARFF file path
157159
ILabeledDataset<?> dataset = null;
158160
try {
159-
dataset = ArffDatasetAdapter.readDataset(new File(arffFilePath));
161+
dataset = arffDatasetAdapter.readDataset(new File(arffFilePath));
160162
} catch (DatasetDeserializationFailedException e) {
161163
e.printStackTrace();
162164
}
@@ -177,7 +179,7 @@ public void evaluateDataset(Instances instances, int k) {
177179

178180
// System.out.println(attribute.getName());
179181
}
180-
ArffDatasetAdapter.serializeDataset(new File("swan/swan_core/swan-out/mlplan/methods-dataset.arff"), split.get(1));
182+
arffDatasetAdapter.serializeDataset(new File("swan/swan_core/swan-out/mlplan/methods-dataset.arff"), split.get(1));
181183

182184

183185
for (int x = 0; x < split.get(1).size(); x++) {

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/training/TrainingSetUpdater.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
package de.fraunhofer.iem.swan.training;
22

3-
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
4-
import de.fraunhofer.iem.swan.io.dataset.SrmList;
5-
import de.fraunhofer.iem.swan.util.Util;
63
import de.fraunhofer.iem.swan.data.Category;
74
import de.fraunhofer.iem.swan.data.Method;
5+
import de.fraunhofer.iem.swan.io.dataset.SrmList;
6+
import de.fraunhofer.iem.swan.util.Util;
87
import org.apache.commons.io.FileUtils;
98

109
import java.io.File;
@@ -115,7 +114,7 @@ public static Set<Method> extractMethodData(HashSet<String> classes, HashSet<Str
115114

116115
System.out.println("Classes: Total/" + classes.size());
117116

118-
String classpath = Util.buildCP(TRAINING_JARS);
117+
String classpath = "Util.buildCP(TRAINING_JARS)";
119118

120119
HashMap<String, String> methodSet = new HashMap<>();
121120

@@ -124,15 +123,15 @@ public static Set<Method> extractMethodData(HashSet<String> classes, HashSet<Str
124123
methodSet.put(t.substring(0, t.indexOf("|")), t.substring(t.indexOf("|") + 1));
125124
}
126125

127-
SourceFileLoader sourceFileLoader = new SourceFileLoader(classpath);
128-
sourceFileLoader.loadMethodsFromTestLib();
126+
// SourceFileLoader sourceFileLoader = new SourceFileLoader(classpath);
127+
//sourceFileLoader.loadMethodsFromTestLib();
129128

130-
Util.createSubclassAnnotations(sourceFileLoader.getMethods(), classpath);
129+
// Util.createSubclassAnnotations(sourceFileLoader.getMethods(), classpath);
131130

132131
HashSet<Method> trainingMethods = new HashSet<>();
133132

134133

135-
for (Method m : sourceFileLoader.getMethods()) {
134+
for (Method m :trainingMethods) {
136135

137136
String sig = m.getClassName() + "." + m.getName();
138137

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/util/Util.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ public static String getClassname(String path) {
130130
className = className.substring(0, className.length() - ".class".length());
131131
if (className.contains("$"))
132132
className = className.substring(0, className.indexOf("$") - 1);
133-
System.out.println(className);
133+
134134
return className;
135135
}
136136

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)