1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.owasp.dependencycheck.data.nvd.ecosystem;
19
20 import org.apache.commons.lang3.StringUtils;
21 import io.github.jeremylong.openvulnerability.client.nvd.DefCveItem;
22
23 import java.util.HashMap;
24 import java.util.Map;
25 import java.util.Map.Entry;
26 import java.util.TreeMap;
27
28
29
30
31
32
33 public class DescriptionEcosystemMapper {
34
35
36
37
38
39 private static final String[] ECOSYSTEMS;
40
41
42
43 private static final int[] HINT_TO_ECOSYSTEM_LOOKUP;
44
45
46
47 private static final TreeMap<String, EcosystemHint> ECOSYSTEM_MAP;
48
49 static {
50 ECOSYSTEM_MAP = new TreeMap<>();
51
52 for (FileExtensionHint fileExtensionHint : FileExtensionHint.values()) {
53 ECOSYSTEM_MAP.put(fileExtensionHint.getValue(), fileExtensionHint);
54 }
55 for (DescriptionKeywordHint descriptionKeywordHint : DescriptionKeywordHint.values()) {
56 ECOSYSTEM_MAP.put(descriptionKeywordHint.getValue(), descriptionKeywordHint);
57 }
58
59 final Map<String, Integer> ecosystemIndexes = new HashMap<>();
60
61 HINT_TO_ECOSYSTEM_LOOKUP = new int[ECOSYSTEM_MAP.size()];
62
63 int index = 0;
64 for (Entry<String, EcosystemHint> entry : ECOSYSTEM_MAP.entrySet()) {
65 final EcosystemHint ecosystemHint = entry.getValue();
66
67 Integer ecosystemIndex = ecosystemIndexes.get(ecosystemHint.getEcosystem());
68 if (ecosystemIndex == null) {
69 ecosystemIndex = ecosystemIndexes.size();
70
71 ecosystemIndexes.put(ecosystemHint.getEcosystem(), ecosystemIndex);
72 }
73
74 HINT_TO_ECOSYSTEM_LOOKUP[index] = ecosystemIndex;
75
76 index++;
77 }
78
79 ECOSYSTEMS = new String[ecosystemIndexes.size()];
80 ecosystemIndexes.forEach((key, value) -> ECOSYSTEMS[value] = key);
81 }
82
83
84
85
86
87 private final boolean[] keywordPrefixes = getPrefixesFor(" -(\"'");
88
89
90
91 private final boolean[] keywordPostfixes = getPrefixesFor(" -)\"',.:;");
92
93
94
95 private final StringAhoCorasickDoubleArrayTrie<EcosystemHint> ahoCorasickDoubleArrayTrie;
96
97
98
99
100 public DescriptionEcosystemMapper() {
101 ahoCorasickDoubleArrayTrie = toAhoCorasickDoubleArrayTrie();
102 }
103
104 protected static boolean[] getPrefixesFor(String str) {
105 int max = -1;
106 for (int i = 0; i < str.length(); i++) {
107 if (max < str.charAt(i)) {
108 max = str.charAt(i);
109 }
110 }
111
112 final boolean[] delimiters = new boolean[max + 1];
113 for (int i = 0; i < str.length(); i++) {
114 delimiters[str.charAt(i)] = true;
115 }
116 return delimiters;
117 }
118
119 protected static StringAhoCorasickDoubleArrayTrie<EcosystemHint> toAhoCorasickDoubleArrayTrie() {
120 final StringAhoCorasickDoubleArrayTrie<EcosystemHint> exact = new StringAhoCorasickDoubleArrayTrie<>();
121 exact.build(ECOSYSTEM_MAP);
122 return exact;
123 }
124
125 protected static boolean isExtension(String str, int begin, int end) {
126 if (str.length() != end && Character.isLetterOrDigit(str.charAt(end))) {
127 return false;
128 }
129
130 return isLowercaseAscii(str, begin + 1, end);
131 }
132
133 protected static boolean isLowercaseAscii(String multicase, int start, int end) {
134 for (int i = start; i < end; i++) {
135 final char c = multicase.charAt(i);
136
137 if (c < 'a' || c > 'z') {
138 return false;
139 }
140 }
141 return true;
142 }
143
144
145
146
147
148
149
150
151
152 public static boolean isURL(String c, int begin) {
153 int pos = begin - 2;
154
155 while (pos > 2) {
156 pos--;
157
158 if (c.charAt(pos) == ' ') {
159 return false;
160 }
161 if (c.charAt(pos) == ':') {
162 return c.charAt(pos + 1) == '/' && c.charAt(pos + 2) == '/';
163 }
164 }
165
166 return false;
167 }
168
169 protected void increment(int i, int[] ecosystemMap) {
170 ecosystemMap[HINT_TO_ECOSYSTEM_LOOKUP[i]]++;
171 }
172
173
174
175
176
177
178
179
180 public String getEcosystem(DefCveItem cve) {
181 final int[] ecosystemMap = new int[ECOSYSTEMS.length];
182 cve.getCve().getDescriptions().stream()
183 .filter((langString) -> (langString.getLang().equals("en")))
184 .forEachOrdered((langString) -> search(langString.getValue(), ecosystemMap));
185 return getResult(ecosystemMap);
186 }
187
188
189
190
191
192
193
194 public String getEcosystem(String multicase) {
195 final int[] ecosystemMap = new int[ECOSYSTEMS.length];
196 search(multicase, ecosystemMap);
197 return getResult(ecosystemMap);
198 }
199
200 private void search(String multicase, int[] ecosystemMap) {
201 final String c = multicase.toLowerCase();
202 ahoCorasickDoubleArrayTrie.parseText(c, (begin, end, value, index) -> {
203 if (value.getNature() == EcosystemHintNature.FILE_EXTENSION) {
204 if (!isExtension(multicase, begin, end)) {
205 return;
206 }
207
208 final String ecosystem = value.getEcosystem();
209
210 if (Ecosystem.PHP.equals(ecosystem) && c.regionMatches(begin, ".php", 0, 4)) {
211 if (isURL(c, begin)) {
212 return;
213 }
214 } else if (Ecosystem.JAVA.equals(ecosystem) && c.regionMatches(begin, ".jsp", 0, 4)) {
215 if (isURL(c, begin)) {
216 return;
217 }
218 }
219 } else {
220
221
222 if (begin != 0) {
223 final char startChar = c.charAt(begin - 1);
224 if (startChar >= keywordPrefixes.length || !keywordPrefixes[startChar]) {
225 return;
226 }
227 }
228 if (end != c.length()) {
229 final char endChar = c.charAt(end);
230 if (endChar >= keywordPostfixes.length || !keywordPostfixes[endChar]) {
231 return;
232 }
233 }
234
235 final String ecosystem = value.getEcosystem();
236 if (Ecosystem.NATIVE.equals(ecosystem)) {
237 if (StringUtils.contains(c, "android")) {
238 return;
239 }
240 }
241 }
242 increment(index, ecosystemMap);
243 });
244 }
245
246 private String getResult(int[] values) {
247 final int best = getBestScore(values);
248 if (best != -1) {
249 return ECOSYSTEMS[best];
250 }
251 return null;
252 }
253
254 private int getBestScore(int[] values) {
255 int bestIndex = -1;
256 int bestScore = -1;
257 for (int i = 0; i < values.length; i++) {
258 if (values[i] > 0) {
259 if (values[i] > bestScore) {
260 bestIndex = i;
261 bestScore = values[i];
262 }
263 values[i] = 0;
264 }
265 }
266 return bestIndex;
267 }
268 }