View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2020 The OWASP Foundation. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.data.nvd.ecosystem;
19  
20  import org.apache.commons.lang3.StringUtils;
21  import io.github.jeremylong.openvulnerability.client.nvd.DefCveItem;
22  
23  import java.util.HashMap;
24  import java.util.Map;
25  import java.util.Map.Entry;
26  import java.util.TreeMap;
27  
28  /**
29   * Helper utility for mapping CVEs to their ecosystems based on the description.
30   *
31   * @author skjolber
32   */
33  public class DescriptionEcosystemMapper {
34  
35      // static fields for thread-safe + hardcoded functionality
36      /**
37       * The array of ecosystems.
38       */
39      private static final String[] ECOSYSTEMS;
40      /**
41       * A helper map to retrieve the index of an ecosystem.
42       */
43      private static final int[] HINT_TO_ECOSYSTEM_LOOKUP;
44      /**
45       * Map of strings to ecosystems.
46       */
47      private static final TreeMap<String, EcosystemHint> ECOSYSTEM_MAP; // thread safe for reading
48  
49      static {
50          ECOSYSTEM_MAP = new TreeMap<>();
51  
52          for (FileExtensionHint fileExtensionHint : FileExtensionHint.values()) {
53              ECOSYSTEM_MAP.put(fileExtensionHint.getValue(), fileExtensionHint);
54          }
55          for (DescriptionKeywordHint descriptionKeywordHint : DescriptionKeywordHint.values()) {
56              ECOSYSTEM_MAP.put(descriptionKeywordHint.getValue(), descriptionKeywordHint);
57          }
58  
59          final Map<String, Integer> ecosystemIndexes = new HashMap<>();
60  
61          HINT_TO_ECOSYSTEM_LOOKUP = new int[ECOSYSTEM_MAP.size()];
62  
63          int index = 0;
64          for (Entry<String, EcosystemHint> entry : ECOSYSTEM_MAP.entrySet()) {
65              final EcosystemHint ecosystemHint = entry.getValue();
66  
67              Integer ecosystemIndex = ecosystemIndexes.get(ecosystemHint.getEcosystem());
68              if (ecosystemIndex == null) {
69                  ecosystemIndex = ecosystemIndexes.size();
70  
71                  ecosystemIndexes.put(ecosystemHint.getEcosystem(), ecosystemIndex);
72              }
73  
74              HINT_TO_ECOSYSTEM_LOOKUP[index] = ecosystemIndex;
75  
76              index++;
77          }
78  
79          ECOSYSTEMS = new String[ecosystemIndexes.size()];
80          ecosystemIndexes.forEach((key, value) -> ECOSYSTEMS[value] = key);
81      }
82  
83      // take advantage of chars also being numbers
84      /**
85       * Prefix prefix for matching ecosystems.
86       */
87      private final boolean[] keywordPrefixes = getPrefixesFor(" -(\"'");
88      /**
89       * Postfix prefix for matching ecosystems.
90       */
91      private final boolean[] keywordPostfixes = getPrefixesFor(" -)\"',.:;");
92      /**
93       * Aho Corasick double array trie used for parsing and matching ecosystems.
94       */
95      private final StringAhoCorasickDoubleArrayTrie<EcosystemHint> ahoCorasickDoubleArrayTrie;
96  
97      /**
98       * Constructs a new description ecosystem mapper.
99       */
100     public DescriptionEcosystemMapper() {
101         ahoCorasickDoubleArrayTrie = toAhoCorasickDoubleArrayTrie();
102     }
103 
104     protected static boolean[] getPrefixesFor(String str) {
105         int max = -1;
106         for (int i = 0; i < str.length(); i++) {
107             if (max < str.charAt(i)) {
108                 max = str.charAt(i);
109             }
110         }
111 
112         final boolean[] delimiters = new boolean[max + 1];
113         for (int i = 0; i < str.length(); i++) {
114             delimiters[str.charAt(i)] = true;
115         }
116         return delimiters;
117     }
118 
119     protected static StringAhoCorasickDoubleArrayTrie<EcosystemHint> toAhoCorasickDoubleArrayTrie() {
120         final StringAhoCorasickDoubleArrayTrie<EcosystemHint> exact = new StringAhoCorasickDoubleArrayTrie<>();
121         exact.build(ECOSYSTEM_MAP);
122         return exact;
123     }
124 
125     protected static boolean isExtension(String str, int begin, int end) {
126         if (str.length() != end && Character.isLetterOrDigit(str.charAt(end))) {
127             return false;
128         }
129 
130         return isLowercaseAscii(str, begin + 1, end);
131     }
132 
133     protected static boolean isLowercaseAscii(String multicase, int start, int end) {
134         for (int i = start; i < end; i++) {
135             final char c = multicase.charAt(i);
136 
137             if (c < 'a' || c > 'z') {
138                 return false;
139             }
140         }
141         return true;
142     }
143 
144     /**
145      * Tests if the string is a URL by looking for '://'.
146      *
147      * @param c the text to test.
148      * @param begin the position in the string to begin searching; note the
149      * search is decreasing to 0
150      * @return <code>true</code> if `://` is found; otherwise <code>false</code>
151      */
152     public static boolean isURL(String c, int begin) {
153         int pos = begin - 2;
154 
155         while (pos > 2) {
156             pos--;
157 
158             if (c.charAt(pos) == ' ') {
159                 return false;
160             }
161             if (c.charAt(pos) == ':') {
162                 return c.charAt(pos + 1) == '/' && c.charAt(pos + 2) == '/';
163             }
164         }
165 
166         return false;
167     }
168 
169     protected void increment(int i, int[] ecosystemMap) {
170         ecosystemMap[HINT_TO_ECOSYSTEM_LOOKUP[i]]++;
171     }
172 
173     /**
174      * Returns the ecosystem if identified by English description from the CVE
175      * data.
176      *
177      * @param cve the CVE data
178      * @return the ecosystem if identified
179      */
180     public String getEcosystem(DefCveItem cve) {
181         final int[] ecosystemMap = new int[ECOSYSTEMS.length];
182         cve.getCve().getDescriptions().stream()
183                 .filter((langString) -> (langString.getLang().equals("en")))
184                 .forEachOrdered((langString) -> search(langString.getValue(), ecosystemMap));
185         return getResult(ecosystemMap);
186     }
187 
188     /**
189      * Determines the ecosystem for the given string.
190      *
191      * @param multicase the string to test
192      * @return the ecosystem
193      */
194     public String getEcosystem(String multicase) {
195         final int[] ecosystemMap = new int[ECOSYSTEMS.length];
196         search(multicase, ecosystemMap);
197         return getResult(ecosystemMap);
198     }
199 
200     private void search(String multicase, int[] ecosystemMap) {
201         final String c = multicase.toLowerCase();
202         ahoCorasickDoubleArrayTrie.parseText(c, (begin, end, value, index) -> {
203             if (value.getNature() == EcosystemHintNature.FILE_EXTENSION) {
204                 if (!isExtension(multicase, begin, end)) {
205                     return;
206                 }
207 
208                 final String ecosystem = value.getEcosystem();
209                 // real extension, if not part of url
210                 if (Ecosystem.PHP.equals(ecosystem) && c.regionMatches(begin, ".php", 0, 4)) {
211                     if (isURL(c, begin)) {
212                         return;
213                     }
214                 } else if (Ecosystem.JAVA.equals(ecosystem) && c.regionMatches(begin, ".jsp", 0, 4)) {
215                     if (isURL(c, begin)) {
216                         return;
217                     }
218                 }
219             } else { // keyword
220 
221                 // check if full word, i.e. typically space first and then space or dot after
222                 if (begin != 0) {
223                     final char startChar = c.charAt(begin - 1);
224                     if (startChar >= keywordPrefixes.length || !keywordPrefixes[startChar]) {
225                         return;
226                     }
227                 }
228                 if (end != c.length()) {
229                     final char endChar = c.charAt(end);
230                     if (endChar >= keywordPostfixes.length || !keywordPostfixes[endChar]) {
231                         return;
232                     }
233                 }
234 
235                 final String ecosystem = value.getEcosystem();
236                 if (Ecosystem.NATIVE.equals(ecosystem)) { // TODO could be checked afterwards
237                     if (StringUtils.contains(c, "android")) {
238                         return;
239                     }
240                 }
241             }
242             increment(index, ecosystemMap);
243         });
244     }
245 
246     private String getResult(int[] values) {
247         final int best = getBestScore(values);
248         if (best != -1) {
249             return ECOSYSTEMS[best];
250         }
251         return null;
252     }
253 
254     private int getBestScore(int[] values) {
255         int bestIndex = -1;
256         int bestScore = -1;
257         for (int i = 0; i < values.length; i++) {
258             if (values[i] > 0) {
259                 if (values[i] > bestScore) {
260                     bestIndex = i;
261                     bestScore = values[i];
262                 }
263                 values[i] = 0;
264             }
265         }
266         return bestIndex;
267     }
268 }