Skip to content

Commit 6e20087

Browse files
committed
Add new doc comments features
1 parent e1a6bb7 commit 6e20087

File tree

9 files changed

+435
-0
lines changed

9 files changed

+435
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package de.fraunhofer.iem.swan.features.doc.embedding;
2+
3+
/**
4+
* @author Oshando Johnson on 25.09.20
5+
*/
6+
public class AutomaticFeatureHandler {
7+
8+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package de.fraunhofer.iem.swan.features.doc.manual;
2+
3+
/**
4+
* @author Oshando Johnson on 28.09.20
5+
*/
6+
public class ManualFeaturesHandler {
7+
8+
9+
}
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package de.fraunhofer.iem.swan.features.doc.manual;
2+
3+
import java.util.Set;
4+
import java.util.stream.Collectors;
5+
import java.util.stream.Stream;
6+
7+
/**
8+
* @author Oshando Johnson on 04.08.20
9+
*/
10+
public class SecurityVocabulary {
11+
12+
public static final Set<String> GENERAL_NOUNS = Stream.of("adapter", "task", "list", "child",
13+
"parent", "array", "cache", "buffer", "chunk", "client", "algorithm", "bug", "dataset", "firewall",
14+
"identifier", "interface", "fragment", "internet", "module", "packet", "garbage", "partition", "mobile",
15+
"matrix", "keyboard", "mouse", "integer", "hardware", "peer", "sensor", "sibling", "smartphone",
16+
"vector", "repository", "framework", "platform", "layer", "software", "memory", "disk")
17+
.collect(Collectors.toSet());
18+
///extract, package, find,
19+
20+
/**
21+
* Source verbs and sinks.
22+
*/
23+
public static final Set<String> SOURCE_VERBS = Stream.of("browse", "copy", "create", "decode", "download", "fetch",
24+
"get", "import", "input", "line", "load", "lookup", "read", "request", "retrieve", "return", "search", "text", "unescape")
25+
.collect(Collectors.toSet());
26+
public static final Set<String> SOURCE_NOUNS = Stream.of("array", "bytes", "data", "database", "datum", "document",
27+
"content", "file", "html", "io", "issue", "json", "line", "network", "node", "object", "output", "retrieve",
28+
"select", "text", "url", "value", "web", "xml")
29+
.collect(Collectors.toSet());
30+
public static final Set<String> SOURCE_PREPOSITIONS = Stream.of("at", "from", "within")
31+
.collect(Collectors.toSet());
32+
public static final Set<String> SOURCE_ADJECTIVE = Stream.of("incoming", "external", "internal", "local")
33+
.collect(Collectors.toSet());
34+
35+
/**
36+
* Sink verbs and sinks
37+
*/
38+
public static final Set<String> SINK_VERBS = Stream.of("backup", "commit", "cookie", "copy", "delete", "dump",
39+
"drop", "email", "e-mail", "establish", "execute", "export", "handle", "hibernate", "insert", "leak",
40+
"line", "list", "log", "manage", "move", "parse", "persist", "print", "put", "redirect", "render",
41+
"replace", "request", "response", "run", "save", "scan", "send", "set", "substitute", "take", "update",
42+
"write", "output", "upload")
43+
.collect(Collectors.toSet());
44+
public static final Set<String> SINK_NOUNS = Stream.of("array", "bytes", "connection", "data", "database", "datum",
45+
"discard", "file", "header", "html", "io", "jdbc", "logger", "message", "network", "post", "print", "output",
46+
"security", "string", "url", "web")
47+
.collect(Collectors.toSet());
48+
public static final Set<String> SINK_PREPOSITIONS = Stream.of("in", "inside", "into", "on", "onto", "to")
49+
.collect(Collectors.toSet());
50+
public static final Set<String> SINK_ADJECTIVE = Stream.of("outgoing", "external", "internal", "malicious")
51+
.collect(Collectors.toSet());
52+
53+
/**
54+
* Sanitizer verbs and sinks
55+
*/
56+
public static final Set<String> SANITIZER_VERBS = Stream.of("apply", "convert", "encode", "encrypt", "decode",
57+
"decrypt", "escape", "hash", "login", "logout", "match", "page", "replace", "sanitize", "strip",
58+
"translate", "turn", "validate")
59+
.collect(Collectors.toSet());
60+
public static final Set<String> SANITIZER_NOUNS = Stream.of("array", "byte", "encoder", "data", "datum", "decoder",
61+
"digest", "hash", "mask", "message", "pattern", "regex", "regular expression", "salt", "string")
62+
.collect(Collectors.toSet());
63+
public static final Set<String> SANITIZER_PREPOSITIONS = Stream.of("for", "to")
64+
.collect(Collectors.toSet());
65+
66+
/**
67+
* Authentication verbs and sinks
68+
*/
69+
public static final Set<String> AUTH_SAFE_VERBS = Stream.of("access", "authenticate", "authorize", "bind",
70+
"connect", "create", "establish", "login", "open", "put", "token", "trust", "verify")
71+
.collect(Collectors.toSet());
72+
73+
public static final Set<String> AUTH_NO_CHANGE_VERBS = Stream.of("access", "check", "get", "has", "validate", "verify")
74+
.collect(Collectors.toSet());
75+
76+
public static final Set<String> AUTH_UNSAFE_VERBS = Stream.of("close", "delete", "end", "disconnect", "logout")
77+
.collect(Collectors.toSet());
78+
79+
public static final Set<String> AUTHENTICATION_NOUNS = Stream.of("access", "account", "auth", "authentication",
80+
"authorization", "connection", "credential", "ldap", "oauth", "privilege", "right", "security", "server", "user")
81+
.collect(Collectors.toSet());
82+
public static final Set<String> AUTHENTICATION_PREPOSITIONS = Stream.of("for", "from", "to")
83+
.collect(Collectors.toSet());
84+
85+
public static final Set<String> AUTHENTICATION_ADVERBS = Stream.of("register", "maliciously")
86+
.collect(Collectors.toSet());
87+
88+
public static final Set<String> AUTHENTICATION_ADJECTIVE = Stream.of("online", "offline", "trust", "verify", "register", "malicious")
89+
.collect(Collectors.toSet());
90+
91+
/**
92+
* CWE078 verbs and sinks
93+
*/
94+
public static final Set<String> CWE078_VERBS = Stream.of("encode", "execute", "make", "process", "run", "delete",
95+
"sanitize", "compile", "sanitize")
96+
.collect(Collectors.toSet());
97+
public static final Set<String> CWE078_NOUNS = Stream.of("system", "command", "credential", "runtime", "encoder", "os",
98+
"operating", "host", "shell")
99+
.collect(Collectors.toSet());
100+
101+
/**
102+
* CWE079 verbs and sinks
103+
*/
104+
public static final Set<String> CWE079_VERBS = Stream.of("render", "input", "hibernate",
105+
"set", "sanitize")
106+
.collect(Collectors.toSet());
107+
public static final Set<String> CWE079_NOUNS = Stream.of("web", "website", "request", "html", "page", "css", "dom",
108+
"header", "document", "node")
109+
.collect(Collectors.toSet());
110+
111+
/**
112+
* CWE089 verbs and sinks
113+
*/
114+
public static final Set<String> CWE089_VERBS = Stream.of("create", "delete", "execute", "insert", "make", "persist","query",
115+
"remove", "run", "sanitize", "save", "schema", "script", "transact", "write", "update")
116+
.collect(Collectors.toSet());
117+
public static final Set<String> CWE089_NOUNS = Stream.of("data", "datum", "database", "db", "encoder", "jdbc", "query",
118+
"request", "row", "table", "tuple", "transaction", "value", "sql")
119+
.collect(Collectors.toSet());
120+
121+
/**
122+
* CWE306 verbs and sinks
123+
*/
124+
public static final Set<String> CWE306_VERBS = Stream.of("login", "logout", "authorise", "authenticate", "grant", "access",
125+
"connect", "disconnect")
126+
.collect(Collectors.toSet());
127+
public static final Set<String> CWE306_NOUNS = Stream.of("0auth", "auth", "authentication", "authorisation", "access", "privilege",
128+
"connection", "disconnection", "user", "account", "profile")
129+
.collect(Collectors.toSet());
130+
131+
/**
132+
* CWE601 verbs and sinks
133+
*/
134+
public static final Set<String> CWE601_VERBS = Stream.of("respond", "send", "forward", "route", "request", "serve",
135+
"respond", "redirect")
136+
.collect(Collectors.toSet());
137+
public static final Set<String> CWE601_NOUNS = Stream.of("request", "http", "servlet", "response", "redirect", "parameter",
138+
"web", "url")
139+
.collect(Collectors.toSet());
140+
141+
/**
142+
* CWE862 verbs and sinks
143+
*/
144+
public static final Set<String> CWE862_VERBS = Stream.of("bind", "connect", "login", "authorize", "authenticate")
145+
.collect(Collectors.toSet());
146+
public static final Set<String> CWE862_NOUNS = Stream.of("credential", "user", "encoder", "authorization", "role", "access")
147+
.collect(Collectors.toSet());
148+
/**
149+
* CWE863 verbs and sinks
150+
*/
151+
public static final Set<String> CWE863_VERBS = Stream.of("bind", "connect", "login", "authorize", "authenticate")
152+
.collect(Collectors.toSet());
153+
public static final Set<String> CWE863_NOUNS = Stream.of("credential", "user", "encoder", "authorization", "role", "access")
154+
.collect(Collectors.toSet());
155+
156+
public static final Set<String> INCOMPLETE_CODE_KEYWORDS = Stream.of("backdoor", "broken", "bypass", "divert", "fixme",
157+
"hack", "kludge", "password", "steal", "stolen", "todo", "trick")
158+
.collect(Collectors.toSet());
159+
160+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package de.fraunhofer.iem.swan.features.doc.manual.annotated;
2+
3+
import de.fraunhofer.iem.swan.data.Category;
4+
import de.fraunhofer.iem.swan.features.doc.manual.FeatureResult;
5+
import de.fraunhofer.iem.swan.features.doc.manual.IDocFeature;
6+
import de.fraunhofer.iem.swan.features.doc.nlp.AnnotatedMethod;
7+
import de.fraunhofer.iem.swan.features.doc.manual.SecurityVocabulary;
8+
9+
/**
10+
* Evaluates if Command Injection words are found in the doc comment.
11+
* * <p>
12+
* * The number of Command Injection verbs and nouns is based on the
13+
* * {@link SecurityVocabulary#CWE078_VERBS}
14+
* * and {@link SecurityVocabulary#CWE078_NOUNS} lists.
15+
*
16+
* @author Oshando Johnson on 30.09.20
17+
*/
18+
public class CommandInjectionCountFeature extends WordCountFeature implements IDocFeature {
19+
20+
21+
public CommandInjectionCountFeature() {
22+
super();
23+
}
24+
25+
@Override
26+
public FeatureResult evaluate(AnnotatedMethod annotatedMethod) {
27+
28+
featureResult.setMethodValue(wordCounter(annotatedMethod.getMethodMap(), Category.CWE078));
29+
featureResult.setClassValue(wordCounter(annotatedMethod.getClassMap(), Category.CWE078));
30+
31+
return featureResult;
32+
}
33+
34+
@Override
35+
public String toString() {
36+
return "CommandInjectionCountFeature [" + featureResult + "]";
37+
}
38+
39+
@Override
40+
public String getName() {
41+
return "CommandInjectionCountFeature";
42+
}
43+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package de.fraunhofer.iem.swan.features.doc.manual.annotated;
2+
3+
import de.fraunhofer.iem.swan.data.Category;
4+
import de.fraunhofer.iem.swan.features.doc.manual.FeatureResult;
5+
import de.fraunhofer.iem.swan.features.doc.manual.IDocFeature;
6+
import de.fraunhofer.iem.swan.features.doc.nlp.AnnotatedMethod;
7+
import de.fraunhofer.iem.swan.features.doc.manual.SecurityVocabulary;
8+
9+
10+
/**
11+
* Evaluates if cross-site scripting words are found in the doc comment.
12+
* * <p>
13+
* * The number of cross-site scripting verbs and nouns is based on the
14+
* * {@link SecurityVocabulary#CWE079_VERBS}
15+
* * and {@link SecurityVocabulary#CWE079_NOUNS} lists.
16+
*
17+
* @author Oshando Johnson on 30.09.20
18+
*/
19+
public class CrossSiteScriptingCountFeature extends WordCountFeature implements IDocFeature {
20+
21+
public CrossSiteScriptingCountFeature() {
22+
super();
23+
}
24+
25+
@Override
26+
public FeatureResult evaluate(AnnotatedMethod annotatedMethod) {
27+
28+
featureResult.setMethodValue(wordCounter(annotatedMethod.getMethodMap(), Category.CWE079));
29+
featureResult.setClassValue(wordCounter(annotatedMethod.getClassMap(), Category.CWE079));
30+
31+
return featureResult;
32+
}
33+
34+
@Override
35+
public String toString() {
36+
return "CrossSiteScriptingCountFeature [" + featureResult + "]";
37+
}
38+
39+
@Override
40+
public String getName() {
41+
return "CrossSiteScriptingCountFeature";
42+
}
43+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package de.fraunhofer.iem.swan.features.doc.manual.annotated;
2+
3+
import de.fraunhofer.iem.swan.data.Category;
4+
import de.fraunhofer.iem.swan.features.doc.manual.FeatureResult;
5+
import de.fraunhofer.iem.swan.features.doc.manual.IDocFeature;
6+
import de.fraunhofer.iem.swan.features.doc.nlp.AnnotatedMethod;
7+
import de.fraunhofer.iem.swan.features.doc.manual.SecurityVocabulary;
8+
9+
10+
/**
11+
* Evaluates if incorrect authorization words are found in the doc comment.
12+
* <p>
13+
* The number of incorrect authorization verbs and nouns is based on the
14+
* {@link SecurityVocabulary#CWE863_VERBS}
15+
* and {@link SecurityVocabulary#CWE863_NOUNS} lists.
16+
*
17+
* @author Oshando Johnson on 30.09.20
18+
*/
19+
public class IncorrectAuthorizationCountFeature extends WordCountFeature implements IDocFeature {
20+
21+
public IncorrectAuthorizationCountFeature() {
22+
super();
23+
}
24+
25+
@Override
26+
public FeatureResult evaluate(AnnotatedMethod annotatedMethod) {
27+
28+
featureResult.setMethodValue(wordCounter(annotatedMethod.getMethodMap(), Category.CWE863));
29+
featureResult.setClassValue(wordCounter(annotatedMethod.getClassMap(), Category.CWE863));
30+
31+
return featureResult;
32+
}
33+
34+
@Override
35+
public String toString() {
36+
return "IncorrectAuthorizationCountFeature [" + featureResult + "]";
37+
}
38+
39+
@Override
40+
public String getName() {
41+
return "IncorrectAuthorizationCountFeature";
42+
}
43+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package de.fraunhofer.iem.swan.features.doc.manual.annotated;
2+
3+
import de.fraunhofer.iem.swan.data.Category;
4+
import de.fraunhofer.iem.swan.features.doc.manual.FeatureResult;
5+
import de.fraunhofer.iem.swan.features.doc.manual.IDocFeature;
6+
import de.fraunhofer.iem.swan.features.doc.nlp.AnnotatedMethod;
7+
import de.fraunhofer.iem.swan.features.doc.manual.SecurityVocabulary;
8+
9+
/**
10+
* Evaluates if missing authentication words are found in the doc comment.
11+
* * <p>
12+
* * The number of missing authentication verbs and nouns is based on the
13+
* * {@link SecurityVocabulary#CWE306_VERBS}
14+
* * and {@link SecurityVocabulary#CWE306_NOUNS} lists.
15+
*
16+
* @author Oshando Johnson on 30.09.20
17+
*/
18+
public class MissingAuthCountFeature extends WordCountFeature implements IDocFeature {
19+
20+
21+
public MissingAuthCountFeature() {
22+
super();
23+
}
24+
25+
@Override
26+
public FeatureResult evaluate(AnnotatedMethod annotatedMethod) {
27+
28+
featureResult.setMethodValue(wordCounter(annotatedMethod.getMethodMap(), Category.CWE306));
29+
featureResult.setClassValue(wordCounter(annotatedMethod.getClassMap(), Category.CWE306));
30+
31+
return featureResult;
32+
}
33+
34+
@Override
35+
public String toString() {
36+
return "MissingAuthCountFeature [" + featureResult + "]";
37+
}
38+
39+
@Override
40+
public String getName() {
41+
return "MissingAuthCountFeature";
42+
}
43+
}

0 commit comments

Comments
 (0)