DescriptionEcosystemMapper.java
- /*
- * This file is part of dependency-check-core.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Copyright (c) 2020 The OWASP Foundation. All Rights Reserved.
- */
- package org.owasp.dependencycheck.data.nvd.ecosystem;
- import org.apache.commons.lang3.StringUtils;
- import io.github.jeremylong.openvulnerability.client.nvd.DefCveItem;
- import java.util.HashMap;
- import java.util.Map;
- import java.util.Map.Entry;
- import java.util.TreeMap;
- /**
- * Helper utility for mapping CVEs to their ecosystems based on the description.
- *
- * @author skjolber
- */
- public class DescriptionEcosystemMapper {
- // static fields for thread-safe + hardcoded functionality
- /**
- * The array of ecosystems.
- */
- private static final String[] ECOSYSTEMS;
- /**
- * A helper map to retrieve the index of an ecosystem.
- */
- private static final int[] HINT_TO_ECOSYSTEM_LOOKUP;
- /**
- * Map of strings to ecosystems.
- */
- private static final TreeMap<String, EcosystemHint> ECOSYSTEM_MAP; // thread safe for reading
- static {
- ECOSYSTEM_MAP = new TreeMap<>();
- for (FileExtensionHint fileExtensionHint : FileExtensionHint.values()) {
- ECOSYSTEM_MAP.put(fileExtensionHint.getValue(), fileExtensionHint);
- }
- for (DescriptionKeywordHint descriptionKeywordHint : DescriptionKeywordHint.values()) {
- ECOSYSTEM_MAP.put(descriptionKeywordHint.getValue(), descriptionKeywordHint);
- }
- final Map<String, Integer> ecosystemIndexes = new HashMap<>();
- HINT_TO_ECOSYSTEM_LOOKUP = new int[ECOSYSTEM_MAP.size()];
- int index = 0;
- for (Entry<String, EcosystemHint> entry : ECOSYSTEM_MAP.entrySet()) {
- final EcosystemHint ecosystemHint = entry.getValue();
- Integer ecosystemIndex = ecosystemIndexes.get(ecosystemHint.getEcosystem());
- if (ecosystemIndex == null) {
- ecosystemIndex = ecosystemIndexes.size();
- ecosystemIndexes.put(ecosystemHint.getEcosystem(), ecosystemIndex);
- }
- HINT_TO_ECOSYSTEM_LOOKUP[index] = ecosystemIndex;
- index++;
- }
- ECOSYSTEMS = new String[ecosystemIndexes.size()];
- ecosystemIndexes.forEach((key, value) -> ECOSYSTEMS[value] = key);
- }
- // take advantage of chars also being numbers
- /**
- * Prefix prefix for matching ecosystems.
- */
- private final boolean[] keywordPrefixes = getPrefixesFor(" -(\"'");
- /**
- * Postfix prefix for matching ecosystems.
- */
- private final boolean[] keywordPostfixes = getPrefixesFor(" -)\"',.:;");
- /**
- * Aho Corasick double array trie used for parsing and matching ecosystems.
- */
- private final StringAhoCorasickDoubleArrayTrie<EcosystemHint> ahoCorasickDoubleArrayTrie;
- /**
- * Constructs a new description ecosystem mapper.
- */
- public DescriptionEcosystemMapper() {
- ahoCorasickDoubleArrayTrie = toAhoCorasickDoubleArrayTrie();
- }
- protected static boolean[] getPrefixesFor(String str) {
- int max = -1;
- for (int i = 0; i < str.length(); i++) {
- if (max < str.charAt(i)) {
- max = str.charAt(i);
- }
- }
- final boolean[] delimiters = new boolean[max + 1];
- for (int i = 0; i < str.length(); i++) {
- delimiters[str.charAt(i)] = true;
- }
- return delimiters;
- }
- protected static StringAhoCorasickDoubleArrayTrie<EcosystemHint> toAhoCorasickDoubleArrayTrie() {
- final StringAhoCorasickDoubleArrayTrie<EcosystemHint> exact = new StringAhoCorasickDoubleArrayTrie<>();
- exact.build(ECOSYSTEM_MAP);
- return exact;
- }
- protected static boolean isExtension(String str, int begin, int end) {
- if (str.length() != end && Character.isLetterOrDigit(str.charAt(end))) {
- return false;
- }
- return isLowercaseAscii(str, begin + 1, end);
- }
- protected static boolean isLowercaseAscii(String multicase, int start, int end) {
- for (int i = start; i < end; i++) {
- final char c = multicase.charAt(i);
- if (c < 'a' || c > 'z') {
- return false;
- }
- }
- return true;
- }
- /**
- * Tests if the string is a URL by looking for '://'.
- *
- * @param c the text to test.
- * @param begin the position in the string to begin searching; note the
- * search is decreasing to 0
- * @return <code>true</code> if `://` is found; otherwise <code>false</code>
- */
- public static boolean isURL(String c, int begin) {
- int pos = begin - 2;
- while (pos > 2) {
- pos--;
- if (c.charAt(pos) == ' ') {
- return false;
- }
- if (c.charAt(pos) == ':') {
- return c.charAt(pos + 1) == '/' && c.charAt(pos + 2) == '/';
- }
- }
- return false;
- }
- protected void increment(int i, int[] ecosystemMap) {
- ecosystemMap[HINT_TO_ECOSYSTEM_LOOKUP[i]]++;
- }
- /**
- * Returns the ecosystem if identified by English description from the CVE
- * data.
- *
- * @param cve the CVE data
- * @return the ecosystem if identified
- */
- public String getEcosystem(DefCveItem cve) {
- final int[] ecosystemMap = new int[ECOSYSTEMS.length];
- cve.getCve().getDescriptions().stream()
- .filter((langString) -> (langString.getLang().equals("en")))
- .forEachOrdered((langString) -> search(langString.getValue(), ecosystemMap));
- return getResult(ecosystemMap);
- }
- /**
- * Determines the ecosystem for the given string.
- *
- * @param multicase the string to test
- * @return the ecosystem
- */
- public String getEcosystem(String multicase) {
- final int[] ecosystemMap = new int[ECOSYSTEMS.length];
- search(multicase, ecosystemMap);
- return getResult(ecosystemMap);
- }
- private void search(String multicase, int[] ecosystemMap) {
- final String c = multicase.toLowerCase();
- ahoCorasickDoubleArrayTrie.parseText(c, (begin, end, value, index) -> {
- if (value.getNature() == EcosystemHintNature.FILE_EXTENSION) {
- if (!isExtension(multicase, begin, end)) {
- return;
- }
- final String ecosystem = value.getEcosystem();
- // real extension, if not part of url
- if (Ecosystem.PHP.equals(ecosystem) && c.regionMatches(begin, ".php", 0, 4)) {
- if (isURL(c, begin)) {
- return;
- }
- } else if (Ecosystem.JAVA.equals(ecosystem) && c.regionMatches(begin, ".jsp", 0, 4)) {
- if (isURL(c, begin)) {
- return;
- }
- }
- } else { // keyword
- // check if full word, i.e. typically space first and then space or dot after
- if (begin != 0) {
- final char startChar = c.charAt(begin - 1);
- if (startChar >= keywordPrefixes.length || !keywordPrefixes[startChar]) {
- return;
- }
- }
- if (end != c.length()) {
- final char endChar = c.charAt(end);
- if (endChar >= keywordPostfixes.length || !keywordPostfixes[endChar]) {
- return;
- }
- }
- final String ecosystem = value.getEcosystem();
- if (Ecosystem.NATIVE.equals(ecosystem)) { // TODO could be checked afterwards
- if (StringUtils.contains(c, "android")) {
- return;
- }
- }
- }
- increment(index, ecosystemMap);
- });
- }
- private String getResult(int[] values) {
- final int best = getBestScore(values);
- if (best != -1) {
- return ECOSYSTEMS[best];
- }
- return null;
- }
- private int getBestScore(int[] values) {
- int bestIndex = -1;
- int bestScore = -1;
- for (int i = 0; i < values.length; i++) {
- if (values[i] > 0) {
- if (values[i] > bestScore) {
- bestIndex = i;
- bestScore = values[i];
- }
- values[i] = 0;
- }
- }
- return bestIndex;
- }
- }