View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import java.io.BufferedReader;
21  import java.io.IOException;
22  import java.io.InputStreamReader;
23  import java.io.UnsupportedEncodingException;
24  import java.net.URLEncoder;
25  import java.nio.charset.StandardCharsets;
26  import java.util.ArrayList;
27  import java.util.Arrays;
28  import java.util.Collections;
29  import java.util.Comparator;
30  import java.util.HashMap;
31  import java.util.HashSet;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Objects;
35  import java.util.Set;
36  import java.util.concurrent.TimeUnit;
37  import java.util.stream.Collectors;
38  import javax.annotation.concurrent.ThreadSafe;
39  import org.apache.commons.lang3.StringUtils;
40  import org.apache.commons.lang3.builder.CompareToBuilder;
41  import org.apache.commons.lang3.builder.EqualsBuilder;
42  import org.apache.commons.lang3.builder.HashCodeBuilder;
43  import org.apache.commons.lang3.mutable.MutableInt;
44  import org.apache.lucene.analysis.CharArraySet;
45  import org.apache.lucene.document.Document;
46  import org.apache.lucene.index.CorruptIndexException;
47  import org.apache.lucene.queryparser.classic.ParseException;
48  import org.apache.lucene.search.Query;
49  import org.apache.lucene.search.ScoreDoc;
50  import org.apache.lucene.search.TopDocs;
51  import org.jetbrains.annotations.NotNull;
52  import org.jetbrains.annotations.Nullable;
53  import org.owasp.dependencycheck.Engine;
54  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
55  import org.owasp.dependencycheck.data.cpe.CpeMemoryIndex;
56  import org.owasp.dependencycheck.data.cpe.Fields;
57  import org.owasp.dependencycheck.data.cpe.IndexEntry;
58  import org.owasp.dependencycheck.data.cpe.IndexException;
59  import org.owasp.dependencycheck.data.cpe.MemoryIndex;
60  import org.owasp.dependencycheck.data.lucene.LuceneUtils;
61  import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
62  import org.owasp.dependencycheck.data.nvd.ecosystem.Ecosystem;
63  import org.owasp.dependencycheck.data.nvdcve.CveDB;
64  import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
65  import org.owasp.dependencycheck.data.update.cpe.CpePlus;
66  import org.owasp.dependencycheck.dependency.Confidence;
67  import org.owasp.dependencycheck.dependency.Dependency;
68  import org.owasp.dependencycheck.dependency.Evidence;
69  import org.owasp.dependencycheck.dependency.EvidenceType;
70  import org.owasp.dependencycheck.dependency.naming.CpeIdentifier;
71  import org.owasp.dependencycheck.dependency.naming.Identifier;
72  import org.owasp.dependencycheck.dependency.naming.PurlIdentifier;
73  import org.owasp.dependencycheck.exception.InitializationException;
74  import org.owasp.dependencycheck.utils.DependencyVersion;
75  import org.owasp.dependencycheck.utils.DependencyVersionUtil;
76  import org.owasp.dependencycheck.utils.Settings;
77  import org.slf4j.Logger;
78  import org.slf4j.LoggerFactory;
79  import us.springett.parsers.cpe.Cpe;
80  import us.springett.parsers.cpe.CpeBuilder;
81  import us.springett.parsers.cpe.exceptions.CpeValidationException;
82  import us.springett.parsers.cpe.values.Part;
83  
84  /**
85   * CPEAnalyzer is a utility class that takes a project dependency and attempts
86   * to discern if there is an associated CPE. It uses the evidence contained
87   * within the dependency to search the Lucene index.
88   *
89   * @author Jeremy Long
90   */
91  @ThreadSafe
92  public class CPEAnalyzer extends AbstractAnalyzer {
93  
94      /**
95       * The Logger.
96       */
97      private static final Logger LOGGER = LoggerFactory.getLogger(CPEAnalyzer.class);
98      /**
99       * The weighting boost to give terms when constructing the Lucene query.
100      */
101     private static final int WEIGHTING_BOOST = 1;
102     /**
103      * A string representation of a regular expression defining characters
104      * utilized within the CPE Names. Note, the :/ are included so URLs are
105      * passed into the Lucene query so that the specialized tokenizer can parse
106      * them.
107      */
108     private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._:/-]";
109     /**
110      * A string representation of a regular expression used to remove all but
111      * alpha characters.
112      */
113     private static final String CLEANSE_NONALPHA_RX = "[^A-Za-z]*";
114     /**
115      * UTF-8 character set name.
116      */
117     private static final String UTF8 = StandardCharsets.UTF_8.name();
118     /**
119      * The URL to search the NVD CVE data at NIST. This is used by calling:
120      * <pre>String.format(NVD_SEARCH_URL, vendor, product, version);</pre>
121      */
122     public static final String NVD_SEARCH_URL = "https://nvd.nist.gov/vuln/search/results?form_type=Advanced&"
123             + "results_type=overview&search_type=all&cpe_vendor=cpe%%3A%%2F%%3A%1$s&cpe_product=cpe%%3A%%2F%%3A%1$s%%3A%2$s&"
124             + "cpe_version=cpe%%3A%%2F%%3A%1$s%%3A%2$s%%3A%3$s";
125 
126     /**
127      * The URL to search the NVD CVE data at NIST. This is used by calling:
128      * <pre>String.format(NVD_SEARCH_URL, vendor, product);</pre>
129      */
130     public static final String NVD_SEARCH_BROAD_URL = "https://nvd.nist.gov/vuln/search/results?form_type=Advanced&"
131             + "results_type=overview&search_type=all&cpe_vendor=cpe%%3A%%2F%%3A%1$s&cpe_product=cpe%%3A%%2F%%3A%1$s%%3A%2$s";
132     /**
133      * The CPE in memory index.
134      */
135     private MemoryIndex cpe;
136     /**
137      * The CVE Database.
138      */
139     private CveDB cve;
140     /**
141      * A reference to the ODC engine.
142      */
143     private Engine engine;
144     /**
145      * The list of ecosystems to skip during analysis. These are skipped because
146      * there is generally a more accurate vulnerability analyzer in the
147      * pipeline.
148      */
149     private List<String> skipEcosystems;
150     /**
151      * A reference to the ecosystem object; used to obtain the max query results
152      * for each ecosystem.
153      */
154     private Ecosystem ecosystemTools;
155     /**
156      * A reference to the suppression analyzer; for timing reasons we need to
157      * test for suppressions immediately after identifying the match because a
158      * higher confidence match on a FP can mask a lower confidence, yet valid
159      * match.
160      */
161     private CpeSuppressionAnalyzer suppression;
162 
163     /**
164      * Returns the name of this analyzer.
165      *
166      * @return the name of this analyzer.
167      */
168     @Override
169     public String getName() {
170         return "CPE Analyzer";
171     }
172 
173     /**
174      * Returns the analysis phase that this analyzer should run in.
175      *
176      * @return the analysis phase that this analyzer should run in.
177      */
178     @Override
179     public AnalysisPhase getAnalysisPhase() {
180         return AnalysisPhase.IDENTIFIER_ANALYSIS;
181     }
182 
183     /**
184      * Creates the CPE Lucene Index.
185      *
186      * @param engine a reference to the dependency-check engine
187      * @throws InitializationException is thrown if there is an issue opening
188      * the index.
189      */
190     @Override
191     public void prepareAnalyzer(Engine engine) throws InitializationException {
192         super.prepareAnalyzer(engine);
193         this.engine = engine;
194         try {
195             this.open(engine.getDatabase());
196         } catch (IOException ex) {
197             LOGGER.debug("Exception initializing the Lucene Index", ex);
198             throw new InitializationException("An exception occurred initializing the Lucene Index", ex);
199         } catch (DatabaseException ex) {
200             LOGGER.debug("Exception accessing the database", ex);
201             throw new InitializationException("An exception occurred accessing the database", ex);
202         }
203         final String[] tmp = engine.getSettings().getArray(Settings.KEYS.ECOSYSTEM_SKIP_CPEANALYZER);
204         if (tmp == null) {
205             skipEcosystems = new ArrayList<>();
206         } else {
207             LOGGER.debug("Skipping CPE Analysis for {}", StringUtils.join(tmp, ","));
208             skipEcosystems = Arrays.asList(tmp);
209         }
210         ecosystemTools = new Ecosystem(engine.getSettings());
211         suppression = new CpeSuppressionAnalyzer();
212         suppression.initialize(engine.getSettings());
213         suppression.prepareAnalyzer(engine);
214     }
215 
216     /**
217      * Opens the data source.
218      *
219      * @param cve a reference to the NVD CVE database
220      * @throws IOException when the Lucene directory to be queried does not
221      * exist or is corrupt.
222      * @throws DatabaseException when the database throws an exception. This
223      * usually occurs when the database is in use by another process.
224      */
225     public void open(CveDB cve) throws IOException, DatabaseException {
226         this.cve = cve;
227         this.cpe = CpeMemoryIndex.getInstance();
228         try {
229             final long creationStart = System.currentTimeMillis();
230             cpe.open(cve.getVendorProductList(), this.getSettings());
231             final long creationSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - creationStart);
232             LOGGER.info("Created CPE Index ({} seconds)", creationSeconds);
233         } catch (IndexException ex) {
234             LOGGER.debug("IndexException", ex);
235             throw new DatabaseException(ex);
236         }
237     }
238 
239     /**
240      * Closes the data sources.
241      */
242     @Override
243     public void closeAnalyzer() {
244         if (cpe != null) {
245             cpe.close();
246             cpe = null;
247         }
248     }
249 
250     /**
251      * Searches the data store of CPE entries, trying to identify the CPE for
252      * the given dependency based on the evidence contained within. The
253      * dependency passed in is updated with any identified CPE values.
254      *
255      * @param dependency the dependency to search for CPE entries on
256      * @throws CorruptIndexException is thrown when the Lucene index is corrupt
257      * @throws IOException is thrown when an IOException occurs
258      * @throws ParseException is thrown when the Lucene query cannot be parsed
259      * @throws AnalysisException thrown if the suppression rules failed
260      */
261     protected void determineCPE(Dependency dependency) throws CorruptIndexException, IOException, ParseException, AnalysisException {
262         boolean identifierAdded;
263 
264         final Set<String> majorVersions = dependency.getSoftwareIdentifiers()
265                 .stream()
266                 .filter(i -> i instanceof PurlIdentifier)
267                 .map(i -> {
268                     final PurlIdentifier p = (PurlIdentifier) i;
269                     final DependencyVersion depVersion = DependencyVersionUtil.parseVersion(p.getVersion(), false);
270                     if (depVersion != null) {
271                         return depVersion.getVersionParts().get(0);
272                     }
273                     return null;
274                 }).collect(Collectors.toSet());
275 
276         final Map<String, MutableInt> vendors = new HashMap<>();
277         final Map<String, MutableInt> products = new HashMap<>();
278         final Set<Integer> previouslyFound = new HashSet<>();
279 
280         for (Confidence confidence : Confidence.values()) {
281             collectTerms(vendors, dependency.getIterator(EvidenceType.VENDOR, confidence));
282             LOGGER.trace("vendor search: {}", vendors);
283             collectTerms(products, dependency.getIterator(EvidenceType.PRODUCT, confidence));
284             addMajorVersionToTerms(majorVersions, products);
285             LOGGER.trace("product search: {}", products);
286             if (!vendors.isEmpty() && !products.isEmpty()) {
287                 final List<IndexEntry> entries = searchCPE(vendors, products,
288                         dependency.getVendorWeightings(), dependency.getProductWeightings(),
289                         dependency.getEcosystem());
290                 if (entries == null) {
291                     continue;
292                 }
293 
294                 identifierAdded = false;
295                 for (IndexEntry e : entries) {
296                     if (previouslyFound.contains(e.getDocumentId()) /*|| (filter > 0 && e.getSearchScore() < filter)*/) {
297                         continue;
298                     }
299                     previouslyFound.add(e.getDocumentId());
300                     if (verifyEntry(e, dependency, majorVersions)) {
301                         final String vendor = e.getVendor();
302                         final String product = e.getProduct();
303                         LOGGER.trace("identified vendor/product: {}/{}", vendor, product);
304                         identifierAdded |= determineIdentifiers(dependency, vendor, product, confidence);
305                     }
306                 }
307                 if (identifierAdded) {
308                     break;
309                 }
310             }
311         }
312     }
313 
314     /**
315      * <p>
316      * Returns the text created by concatenating the text and the values from
317      * the EvidenceCollection (filtered for a specific confidence). This
318      * attempts to prevent duplicate terms from being added.</p>
319      * <p>
320      * Note, if the evidence is longer then 1000 characters it will be
321      * truncated.</p>
322      *
323      * @param terms the collection of terms
324      * @param evidence an iterable set of evidence to concatenate
325      */
326     @SuppressWarnings("null")
327 
328     protected void collectTerms(Map<String, MutableInt> terms, Iterable<Evidence> evidence) {
329         for (Evidence e : evidence) {
330             String value = cleanseText(e.getValue());
331             if (StringUtils.isBlank(value)) {
332                 continue;
333             }
334             if (value.length() > 1000) {
335                 boolean trimmed = false;
336                 int pos = value.lastIndexOf(" ", 1000);
337                 if (pos > 0) {
338                     value = value.substring(0, pos);
339                     trimmed = true;
340                 } else {
341                     pos = value.lastIndexOf(".", 1000);
342                 }
343                 if (!trimmed) {
344                     if (pos > 0) {
345                         value = value.substring(0, pos);
346                         trimmed = true;
347                     } else {
348                         pos = value.lastIndexOf("-", 1000);
349                     }
350                 }
351                 if (!trimmed) {
352                     if (pos > 0) {
353                         value = value.substring(0, pos);
354                         trimmed = true;
355                     } else {
356                         pos = value.lastIndexOf("_", 1000);
357                     }
358                 }
359                 if (!trimmed) {
360                     if (pos > 0) {
361                         value = value.substring(0, pos);
362                         trimmed = true;
363                     } else {
364                         pos = value.lastIndexOf("/", 1000);
365                     }
366                 }
367                 if (!trimmed && pos > 0) {
368                     value = value.substring(0, pos);
369                     trimmed = true;
370                 }
371                 if (!trimmed) {
372                     value = value.substring(0, 1000);
373                 }
374             }
375             addTerm(terms, value);
376         }
377     }
378 
379     private void addMajorVersionToTerms(Set<String> majorVersions, Map<String, MutableInt> products) {
380         final Map<String, MutableInt> temp = new HashMap<>();
381         products.entrySet().stream()
382                 .filter(term -> term.getKey() != null)
383                 .forEach(term -> majorVersions.stream()
384                 .filter(version -> version != null
385                 && (!term.getKey().endsWith(version)
386                 && !Character.isDigit(term.getKey().charAt(term.getKey().length() - 1))
387                 && !products.containsKey(term.getKey() + version)))
388                 .forEach(version -> {
389                     addTerm(temp, term.getKey() + version);
390                 }));
391         products.entrySet().stream()
392                 .filter(term -> term.getKey() != null)
393                 .forEach(term -> majorVersions.stream()
394                 .filter(Objects::nonNull)
395                 .map(version -> "v" + version)
396                 .filter(version -> (!term.getKey().endsWith(version)
397                 && !Character.isDigit(term.getKey().charAt(term.getKey().length() - 1))
398                 && !products.containsKey(term.getKey() + version)))
399                 .forEach(version -> {
400                     addTerm(temp, term.getKey() + version);
401                 }));
402         products.putAll(temp);
403     }
404 
405     /**
406      * Adds a term to the map of terms.
407      *
408      * @param terms the map of terms
409      * @param value the value of the term to add
410      */
411     private void addTerm(Map<String, MutableInt> terms, String value) {
412         final MutableInt count = terms.get(value);
413         if (count == null) {
414             terms.put(value, new MutableInt(1));
415         } else {
416             count.add(1);
417         }
418     }
419 
420     /**
421      * <p>
422      * Searches the Lucene CPE index to identify possible CPE entries associated
423      * with the supplied vendor, product, and version.</p>
424      *
425      * <p>
426      * If either the vendorWeightings or productWeightings lists have been
427      * populated this data is used to add weighting factors to the search.</p>
428      *
429      * @param vendor the text used to search the vendor field
430      * @param product the text used to search the product field
431      * @param vendorWeightings a list of strings to use to add weighting factors
432      * to the vendor field
433      * @param productWeightings Adds a list of strings that will be used to add
434      * weighting factors to the product search
435      * @param ecosystem the dependency's ecosystem
436      * @return a list of possible CPE values
437      */
438     protected List<IndexEntry> searchCPE(Map<String, MutableInt> vendor, Map<String, MutableInt> product,
439             Set<String> vendorWeightings, Set<String> productWeightings, String ecosystem) {
440 
441         final int maxQueryResults = ecosystemTools.getLuceneMaxQueryLimitFor(ecosystem);
442         final List<IndexEntry> ret = new ArrayList<>(maxQueryResults);
443 
444         final String searchString = buildSearch(vendor, product, vendorWeightings, productWeightings);
445         if (searchString == null) {
446             return ret;
447         }
448         try {
449             final Query query = cpe.parseQuery(searchString);
450             final TopDocs docs = cpe.search(query, maxQueryResults);
451 
452             for (ScoreDoc d : docs.scoreDocs) {
453                 //if (d.score >= minLuceneScore) {
454                 final Document doc = cpe.getDocument(d.doc);
455                 final IndexEntry entry = new IndexEntry();
456                 entry.setDocumentId(d.doc);
457                 entry.setVendor(doc.get(Fields.VENDOR));
458                 entry.setProduct(doc.get(Fields.PRODUCT));
459                 entry.setSearchScore(d.score);
460 
461 //                LOGGER.error("Explanation: ---------------------");
462 //                LOGGER.error("Explanation: " + entry.getVendor() + " " + entry.getProduct() + " " + entry.getSearchScore());
463 //                LOGGER.error("Explanation: " + searchString);
464 //                LOGGER.error("Explanation: " + cpe.explain(query, d.doc));
465                 if (!ret.contains(entry)) {
466                     ret.add(entry);
467                 }
468                 //}
469             }
470             return ret;
471         } catch (ParseException ex) {
472             LOGGER.warn("An error occurred querying the CPE data. See the log for more details.");
473             LOGGER.info("Unable to parse: {}", searchString, ex);
474         } catch (IndexException ex) {
475             LOGGER.warn("An error occurred resetting the CPE index searcher. See the log for more details.");
476             LOGGER.info("Unable to reset the search analyzer", ex);
477         } catch (IOException ex) {
478             LOGGER.warn("An error occurred reading CPE data. See the log for more details.");
479             LOGGER.info("IO Error with search string: {}", searchString, ex);
480         }
481         return null;
482     }
483 
484     /**
485      * <p>
486      * Builds a Lucene search string by properly escaping data and constructing
487      * a valid search query.</p>
488      *
489      * <p>
490      * If either the possibleVendor or possibleProducts lists have been
491      * populated this data is used to add weighting factors to the search string
492      * generated.</p>
493      *
494      * @param vendor text to search the vendor field
495      * @param product text to search the product field
496      * @param vendorWeighting a list of strings to apply to the vendor to boost
497      * the terms weight
498      * @param productWeightings a list of strings to apply to the product to
499      * boost the terms weight
500      * @return the Lucene query
501      */
502     protected String buildSearch(Map<String, MutableInt> vendor, Map<String, MutableInt> product,
503             Set<String> vendorWeighting, Set<String> productWeightings) {
504 
505         final StringBuilder sb = new StringBuilder();
506 
507         if (!appendWeightedSearch(sb, Fields.PRODUCT, product, productWeightings)) {
508             return null;
509         }
510         sb.append(" AND ");
511         if (!appendWeightedSearch(sb, Fields.VENDOR, vendor, vendorWeighting)) {
512             return null;
513         }
514         return sb.toString();
515     }
516 
517     /**
518      * This method constructs a Lucene query for a given field. The searchText
519      * is split into separate words and if the word is within the list of
520      * weighted words then an additional weighting is applied to the term as it
521      * is appended into the query.
522      *
523      * @param sb a StringBuilder that the query text will be appended to.
524      * @param field the field within the Lucene index that the query is
525      * searching.
526      * @param terms text used to construct the query.
527      * @param weightedText a list of terms that will be considered higher
528      * importance when searching.
529      * @return if the append was successful.
530      */
531     @SuppressWarnings("StringSplitter")
532     private boolean appendWeightedSearch(StringBuilder sb, String field, Map<String, MutableInt> terms, Set<String> weightedText) {
533         if (terms.isEmpty()) {
534             return false;
535         }
536         sb.append(field).append(":(");
537         boolean addSpace = false;
538         boolean addedTerm = false;
539 
540         for (Map.Entry<String, MutableInt> entry : terms.entrySet()) {
541             final StringBuilder boostedTerms = new StringBuilder();
542             final int weighting = entry.getValue().intValue();
543             final String[] text = entry.getKey().split(" ");
544             for (String word : text) {
545                 if (word.isEmpty()) {
546                     continue;
547                 }
548                 if (addSpace) {
549                     sb.append(" ");
550                 } else {
551                     addSpace = true;
552                 }
553                 addedTerm = true;
554                 if (LuceneUtils.isKeyword(word)) {
555                     sb.append("\"");
556                     LuceneUtils.appendEscapedLuceneQuery(sb, word);
557                     sb.append("\"");
558                 } else {
559                     LuceneUtils.appendEscapedLuceneQuery(sb, word);
560                 }
561                 final String boostTerm = findBoostTerm(word, weightedText);
562 
563                 //The weighting is on a full phrase rather then at a term level for vendor or products
564                 //TODO - should the weighting be at a "word" level as opposed to phrase level? Or combined word and phrase?
565                 //remember the reason we are counting the frequency of "phrases" as opposed to terms is that
566                 //we need to keep the correct sequence of terms from the evidence so the term concatenating analyzer
567                 //works correctly and will causes searches to take spring framework and produce: spring springframework framework
568                 if (boostTerm != null) {
569                     sb.append("^").append(weighting + WEIGHTING_BOOST);
570                     if (!boostTerm.equals(word)) {
571                         boostedTerms.append(" ");
572                         LuceneUtils.appendEscapedLuceneQuery(boostedTerms, boostTerm);
573                         boostedTerms.append("^").append(weighting + WEIGHTING_BOOST);
574                     }
575                 } else if (weighting > 1) {
576                     sb.append("^").append(weighting);
577                 }
578             }
579             if (boostedTerms.length() > 0) {
580                 sb.append(boostedTerms);
581             }
582         }
583         sb.append(")");
584         return addedTerm;
585     }
586 
587     /**
588      * Removes characters from the input text that are not used within the CPE
589      * index.
590      *
591      * @param text is the text to remove the characters from.
592      * @return the text having removed some characters.
593      */
594     private String cleanseText(String text) {
595         return text.replaceAll(CLEANSE_CHARACTER_RX, " ");
596     }
597 
598     /**
599      * Searches the collection of boost terms for the given term. The elements
600      * are case insensitive matched using only the alpha-numeric contents of the
601      * terms; all other characters are removed.
602      *
603      * @param term the term to search for
604      * @param boost the collection of boost terms
605      * @return the value identified
606      */
607     private String findBoostTerm(String term, Set<String> boost) {
608         for (String entry : boost) {
609             if (equalsIgnoreCaseAndNonAlpha(term, entry)) {
610                 return entry;
611             }
612         }
613         return null;
614     }
615 
616     /**
617      * Compares two strings after lower casing them and removing the non-alpha
618      * characters.
619      *
620      * @param l string one to compare.
621      * @param r string two to compare.
622      * @return whether or not the two strings are similar.
623      */
624     private boolean equalsIgnoreCaseAndNonAlpha(String l, String r) {
625         if (l == null || r == null) {
626             return false;
627         }
628 
629         final String left = l.replaceAll(CLEANSE_NONALPHA_RX, "");
630         final String right = r.replaceAll(CLEANSE_NONALPHA_RX, "");
631         return left.equalsIgnoreCase(right);
632     }
633 
634     /**
635      * Ensures that the CPE Identified matches the dependency. This validates
636      * that the product, vendor, and version information for the CPE are
637      * contained within the dependencies evidence.
638      *
639      * @param entry a CPE entry
640      * @param dependency the dependency that the CPE entries could be for
641      * @param majorVersions the major versions detected for the dependency
642      * @return whether or not the entry is valid.
643      */
644     private boolean verifyEntry(final IndexEntry entry, final Dependency dependency,
645             final Set<String> majorVersions) {
646         boolean isValid = false;
647         //TODO - does this nullify some of the fuzzy matching that happens in the lucene search?
648         // for instance CPE some-component and in the evidence we have SomeComponent.
649 
650         //TODO - should this have a package manager only flag instead of just looking for NPM
651         if (Ecosystem.NODEJS.equals(dependency.getEcosystem())) {
652             for (Identifier i : dependency.getSoftwareIdentifiers()) {
653                 if (i instanceof PurlIdentifier) {
654                     final PurlIdentifier p = (PurlIdentifier) i;
655                     if (cleanPackageName(p.getName()).equals(cleanPackageName(entry.getProduct()))) {
656                         isValid = true;
657                     }
658                 }
659             }
660         } else if (collectionContainsString(dependency.getEvidence(EvidenceType.VENDOR), entry.getVendor())) {
661             if (collectionContainsString(dependency.getEvidence(EvidenceType.PRODUCT), entry.getProduct())) {
662                 isValid = true;
663             } else {
664                 isValid = majorVersions.stream().filter(version
665                         -> version != null && entry.getProduct().endsWith("v" + version) && entry.getProduct().length() > version.length() + 1)
666                         .anyMatch(version
667                                 -> collectionContainsString(dependency.getEvidence(EvidenceType.PRODUCT),
668                                 entry.getProduct().substring(0, entry.getProduct().length() - version.length() - 1))
669                         );
670                 isValid |= majorVersions.stream().filter(version
671                         -> version != null && entry.getProduct().endsWith(version) && entry.getProduct().length() > version.length())
672                         .anyMatch(version
673                                 -> collectionContainsString(dependency.getEvidence(EvidenceType.PRODUCT),
674                                 entry.getProduct().substring(0, entry.getProduct().length() - version.length()))
675                         );
676             }
677         }
678         return isValid;
679     }
680 
681     /**
682      * Only returns alpha numeric characters contained in a given package name.
683      *
684      * @param name the package name to cleanse
685      * @return the cleansed packaage name
686      */
687     private String cleanPackageName(String name) {
688         if (name == null) {
689             return "";
690         }
691         return name.replaceAll("[^a-zA-Z0-9]+", "");
692     }
693 
694     /**
695      * Used to determine if the EvidenceCollection contains a specific string.
696      *
697      * @param evidence an of evidence object to check
698      * @param text the text to search for
699      * @return whether or not the EvidenceCollection contains the string
700      */
701     @SuppressWarnings("StringSplitter")
702     private boolean collectionContainsString(Set<Evidence> evidence, String text) {
703         //TODO - likely need to change the split... not sure if this will work for CPE with special chars
704         if (text == null) {
705             return false;
706         }
707         // Check if we have an exact match
708         final String textLC = text.toLowerCase();
709         for (Evidence e : evidence) {
710             if (e.getValue().toLowerCase().equals(textLC)) {
711                 return true;
712             }
713         }
714 
715         final String[] words = text.split("[\\s_-]+");
716         final List<String> list = new ArrayList<>();
717         String tempWord = null;
718         final CharArraySet stopWords = SearchFieldAnalyzer.getStopWords();
719         for (String word : words) {
720             /*
721              single letter words should be concatenated with the next word.
722              so { "m", "core", "sample" } -> { "mcore", "sample" }
723              */
724             if (tempWord != null) {
725                 list.add(tempWord + word);
726                 tempWord = null;
727             } else if (word.length() <= 2) {
728                 tempWord = word;
729             } else {
730                 if (stopWords.contains(word)) {
731                     continue;
732                 }
733                 list.add(word);
734             }
735         }
736         if (tempWord != null) {
737             if (!list.isEmpty()) {
738                 final String tmp = list.get(list.size() - 1) + tempWord;
739                 list.add(tmp);
740             } else {
741                 list.add(tempWord);
742             }
743         }
744         if (list.isEmpty()) {
745             return false;
746         }
747         boolean isValid = true;
748 
749         // Prepare the evidence values, e.g. remove the characters we used for splitting
750         final List<String> evidenceValues = new ArrayList<>(evidence.size());
751         evidence.forEach((e) -> evidenceValues.add(e.getValue().toLowerCase().replaceAll("[\\s_-]+", "")));
752 
753         for (String word : list) {
754             word = word.toLowerCase();
755             boolean found = false;
756             for (String e : evidenceValues) {
757                 if (e.contains(word)) {
758                     if ("http".equals(word) && e.contains("http:")) {
759                         continue;
760                     }
761                     found = true;
762                     break;
763                 }
764             }
765             isValid &= found;
766         }
767         return isValid;
768     }
769 
770     /**
771      * Analyzes a dependency and attempts to determine if there are any CPE
772      * identifiers for this dependency.
773      *
774      * @param dependency The Dependency to analyze.
775      * @param engine The analysis engine
776      * @throws AnalysisException is thrown if there is an issue analyzing the
777      * dependency.
778      */
779     @Override
780     protected void analyzeDependency(Dependency dependency, Engine engine) throws AnalysisException {
781         if (skipEcosystems.contains(dependency.getEcosystem())) {
782             return;
783         }
784         try {
785             determineCPE(dependency);
786         } catch (CorruptIndexException ex) {
787             throw new AnalysisException("CPE Index is corrupt.", ex);
788         } catch (IOException ex) {
789             throw new AnalysisException("Failure opening the CPE Index.", ex);
790         } catch (ParseException ex) {
791             throw new AnalysisException("Unable to parse the generated Lucene query for this dependency.", ex);
792         }
793     }
794 
795     /**
796      * Retrieves a list of CPE values from the CveDB based on the vendor and
797      * product passed in. The list is then validated to find only CPEs that are
798      * valid for the given dependency. It is possible that the CPE identified is
799      * a best effort "guess" based on the vendor, product, and version
800      * information.
801      *
802      * @param dependency the Dependency being analyzed
803      * @param vendor the vendor for the CPE being analyzed
804      * @param product the product for the CPE being analyzed
805      * @param currentConfidence the current confidence being used during
806      * analysis
807      * @return <code>true</code> if an identifier was added to the dependency;
808      * otherwise <code>false</code>
809      * @throws UnsupportedEncodingException is thrown if UTF-8 is not supported
810      * @throws AnalysisException thrown if the suppression rules failed
811      */
812     @SuppressWarnings("StringSplitter")
813     protected boolean determineIdentifiers(Dependency dependency, String vendor, String product,
814             Confidence currentConfidence) throws UnsupportedEncodingException, AnalysisException {
815 
816         final CpeBuilder cpeBuilder = new CpeBuilder();
817 
818         final Set<CpePlus> cpePlusEntries = cve.getCPEs(vendor, product);
819         final Set<Cpe> cpes = filterEcosystem(dependency.getEcosystem(), cpePlusEntries);
820         if (cpes == null || cpes.isEmpty()) {
821             return false;
822         }
823 
824         DependencyVersion bestGuess;
825         if ("Golang".equals(dependency.getEcosystem()) && dependency.getVersion() == null) {
826             bestGuess = new DependencyVersion("*");
827         } else {
828             bestGuess = new DependencyVersion("-");
829         }
830         String bestGuessUpdate = null;
831         Confidence bestGuessConf = null;
832         String bestGuessURL = null;
833         final Set<IdentifierMatch> collected = new HashSet<>();
834 
835         considerDependencyVersion(dependency, vendor, product, currentConfidence, collected);
836 
837         //TODO the following algorithm incorrectly identifies things as a lower version
838         // if there lower confidence evidence when the current (highest) version number
839         // is newer then anything in the NVD.
840         for (Confidence conf : Confidence.values()) {
841             for (Evidence evidence : dependency.getIterator(EvidenceType.VERSION, conf)) {
842                 final DependencyVersion evVer = DependencyVersionUtil.parseVersion(evidence.getValue(), true);
843                 if (evVer == null) {
844                     continue;
845                 }
846                 DependencyVersion evBaseVer = null;
847                 String evBaseVerUpdate = null;
848                 final int idx = evVer.getVersionParts().size() - 1;
849                 if (evVer.getVersionParts().get(idx)
850                         .matches("^(v|release|final|snapshot|beta|alpha|u|rc|m|20\\d\\d).*$")) {
851                     //store the update version
852                     final String checkUpdate = evVer.getVersionParts().get(idx);
853                     if (checkUpdate.matches("^(v|release|final|snapshot|beta|alpha|u|rc|m|20\\d\\d).*$")) {
854                         evBaseVerUpdate = checkUpdate;
855                         evBaseVer = new DependencyVersion();
856                         evBaseVer.setVersionParts(evVer.getVersionParts().subList(0, idx));
857                     }
858                 }
859                 //TODO - review and update for new JSON data
860                 for (Cpe vs : cpes) {
861                     final DependencyVersion dbVer = DependencyVersionUtil.parseVersion(vs.getVersion());
862                     DependencyVersion dbVerUpdate = dbVer;
863                     if (vs.getUpdate() != null && !vs.getUpdate().isEmpty() && !vs.getUpdate().startsWith("*") && !vs.getUpdate().startsWith("-")) {
864                         dbVerUpdate = DependencyVersionUtil.parseVersion(vs.getVersion() + '.' + vs.getUpdate(), true);
865                     }
866                     if (dbVer == null) { //special case, no version specified - everything is vulnerable
867                         final String url = String.format(NVD_SEARCH_BROAD_URL, URLEncoder.encode(vs.getVendor(), UTF8),
868                                 URLEncoder.encode(vs.getProduct(), UTF8));
869                         final IdentifierMatch match = new IdentifierMatch(vs, url, IdentifierConfidence.BROAD_MATCH, conf);
870                         collected.add(match);
871                     } else if (evVer.equals(dbVer)) {
872                         addExactMatch(vs, evBaseVerUpdate, conf, collected);
873                     } else if (evBaseVer != null && evBaseVer.equals(dbVer)
874                             && (bestGuessConf == null || bestGuessConf.compareTo(conf) > 0)) {
875                         bestGuessConf = conf;
876                         bestGuess = dbVer;
877                         bestGuessUpdate = evBaseVerUpdate;
878                         bestGuessURL = String.format(NVD_SEARCH_URL, URLEncoder.encode(vs.getVendor(), UTF8),
879                                 URLEncoder.encode(vs.getProduct(), UTF8), URLEncoder.encode(vs.getVersion(), UTF8));
880                     } else if (dbVerUpdate != null && evVer.getVersionParts().size() <= dbVerUpdate.getVersionParts().size()
881                             && evVer.matchesAtLeastThreeLevels(dbVerUpdate)) {
882                         if (bestGuessConf == null || bestGuessConf.compareTo(conf) > 0) {
883                             if (bestGuess.getVersionParts().size() < dbVer.getVersionParts().size()) {
884                                 bestGuess = dbVer;
885                                 bestGuessUpdate = evBaseVerUpdate;
886                                 bestGuessConf = conf;
887                             }
888                         }
889                     }
890                 }
891                 if ((bestGuessConf == null || bestGuessConf.compareTo(conf) > 0)
892                         && bestGuess.getVersionParts().size() < evVer.getVersionParts().size()) {
893                     bestGuess = evVer;
894                     bestGuessUpdate = evBaseVerUpdate;
895                     bestGuessConf = conf;
896                 }
897             }
898         }
899 
900         cpeBuilder.part(Part.APPLICATION).vendor(vendor).product(product);
901         final int idx = bestGuess.getVersionParts().size() - 1;
902         if (bestGuess.getVersionParts().get(idx)
903                 .matches("^(v|release|final|snapshot|beta|alpha|u|rc|m|20\\d\\d).*$")) {
904             cpeBuilder.version(StringUtils.join(bestGuess.getVersionParts().subList(0, idx), "."));
905             //when written - no update versions in the NVD start with v### - they all strip the v off
906             if (bestGuess.getVersionParts().get(idx).matches("^v\\d.*$")) {
907                 cpeBuilder.update(bestGuess.getVersionParts().get(idx).substring(1));
908             } else {
909                 cpeBuilder.update(bestGuess.getVersionParts().get(idx));
910             }
911         } else {
912             cpeBuilder.version(bestGuess.toString());
913             if (bestGuessUpdate != null) {
914                 cpeBuilder.update(bestGuessUpdate);
915             }
916         }
917         final Cpe guessCpe;
918 
919         try {
920             guessCpe = cpeBuilder.build();
921         } catch (CpeValidationException ex) {
922             throw new AnalysisException(String.format("Unable to create a CPE for %s:%s:%s", vendor, product, bestGuess));
923         }
924         if (!"-".equals(guessCpe.getVersion())) {
925             String url = null;
926             if (bestGuessURL != null) {
927                 url = bestGuessURL;
928             }
929             if (bestGuessConf == null) {
930                 bestGuessConf = Confidence.LOW;
931             }
932             final IdentifierMatch match = new IdentifierMatch(guessCpe, url, IdentifierConfidence.BEST_GUESS, bestGuessConf);
933 
934             collected.add(match);
935         }
936         boolean identifierAdded = false;
937         if (!collected.isEmpty()) {
938             final List<IdentifierMatch> items = new ArrayList<>(collected);
939 
940             Collections.sort(items);
941             final IdentifierConfidence bestIdentifierQuality = items.get(0).getIdentifierConfidence();
942             final Confidence bestEvidenceQuality = items.get(0).getEvidenceConfidence();
943             boolean addedNonGuess = false;
944             final Confidence prevAddedConfidence = dependency.getVulnerableSoftwareIdentifiers().stream().map(Identifier::getConfidence)
945                     .min(Comparator.comparing(Confidence::ordinal))
946                     .orElse(Confidence.LOW);
947 
948             for (IdentifierMatch m : items) {
949                 if (bestIdentifierQuality.equals(m.getIdentifierConfidence())
950                         && bestEvidenceQuality.equals(m.getEvidenceConfidence())) {
951                     final CpeIdentifier i = m.getIdentifier();
952                     if (bestIdentifierQuality == IdentifierConfidence.BEST_GUESS) {
953                         if (addedNonGuess) {
954                             continue;
955                         }
956                         i.setConfidence(Confidence.LOW);
957                     } else {
958                         i.setConfidence(bestEvidenceQuality);
959                     }
960                     if (prevAddedConfidence.compareTo(i.getConfidence()) < 0) {
961                         continue;
962                     }
963 
964                     //TODO - while this gets the job down it is slow; consider refactoring
965                     dependency.addVulnerableSoftwareIdentifier(i);
966                     suppression.analyze(dependency, engine);
967                     if (dependency.getVulnerableSoftwareIdentifiers().contains(i)) {
968                         identifierAdded = true;
969                         if (!addedNonGuess && bestIdentifierQuality != IdentifierConfidence.BEST_GUESS) {
970                             addedNonGuess = true;
971                         }
972                     }
973                 }
974             }
975         }
976         return identifierAdded;
977     }
978 
979     /**
980      * Adds a new CPE to the identifier match collection.
981      *
982      * @param vs a reference to the vulnerable software
983      * @param updateVersion the update version
984      * @param conf the current confidence
985      * @param collected a reference to the collected identifiers
986      * @throws UnsupportedEncodingException thrown if UTF-8 is not supported
987      */
988     private void addExactMatch(Cpe vs, String updateVersion, Confidence conf,
989             final Set<IdentifierMatch> collected) throws UnsupportedEncodingException {
990 
991         final CpeBuilder cpeBuilder = new CpeBuilder();
992         final String url = String.format(NVD_SEARCH_URL, URLEncoder.encode(vs.getVendor(), UTF8),
993                 URLEncoder.encode(vs.getProduct(), UTF8), URLEncoder.encode(vs.getVersion(), UTF8));
994         Cpe useCpe;
995         if (updateVersion != null && "*".equals(vs.getUpdate())) {
996             try {
997                 useCpe = cpeBuilder.part(vs.getPart()).wfVendor(vs.getWellFormedVendor())
998                         .wfProduct(vs.getWellFormedProduct()).wfVersion(vs.getWellFormedVersion())
999                         .wfEdition(vs.getWellFormedEdition()).wfLanguage(vs.getWellFormedLanguage())
1000                         .wfOther(vs.getWellFormedOther()).wfSwEdition(vs.getWellFormedSwEdition())
1001                         .update(updateVersion).build();
1002             } catch (CpeValidationException ex) {
1003                 LOGGER.debug("Error building cpe with update:" + updateVersion, ex);
1004                 useCpe = vs;
1005             }
1006         } else {
1007             useCpe = vs;
1008         }
1009         final IdentifierMatch match = new IdentifierMatch(useCpe, url, IdentifierConfidence.EXACT_MATCH, conf);
1010         collected.add(match);
1011     }
1012 
1013     /**
1014      * Evaluates whether or not to use the `version` of the dependency instead
1015      * of the version evidence. The dependency should not always be used as it
1016      * can cause FP.
1017      *
1018      * @param dependency the dependency being analyzed
1019      * @param product the product name
1020      * @param vendor the vendor name
1021      * @param confidence the current confidence level
1022      * @param collected a reference to the identifiers matched
1023      * @throws AnalysisException thrown if aliens attacked and valid input could
1024      * not be used to construct a CPE
1025      * @throws UnsupportedEncodingException thrown if run on a system that
1026      * doesn't support UTF-8
1027      */
1028     private void considerDependencyVersion(Dependency dependency,
1029             String vendor, String product, Confidence confidence,
1030             final Set<IdentifierMatch> collected)
1031             throws AnalysisException, UnsupportedEncodingException {
1032 
1033         if (dependency.getVersion() != null && !dependency.getVersion().isEmpty()) {
1034             final CpeBuilder cpeBuilder = new CpeBuilder();
1035             boolean useDependencyVersion = true;
1036             final CharArraySet stopWords = SearchFieldAnalyzer.getStopWords();
1037             if (dependency.getName() != null && !dependency.getName().isEmpty()) {
1038                 final String name = dependency.getName();
1039                 for (String word : product.split("[^a-zA-Z0-9]")) {
1040                     useDependencyVersion &= name.contains(word) || stopWords.contains(word)
1041                             || wordMatchesEcosystem(dependency.getEcosystem(), word);
1042                 }
1043             }
1044 
1045             if (useDependencyVersion) {
1046                 //TODO - we need to filter this so that we only use this if something in the
1047                 //dependency.getName() matches the vendor/product in some way
1048                 final DependencyVersion depVersion = new DependencyVersion(dependency.getVersion());
1049                 if (depVersion.getVersionParts().size() > 0) {
1050                     cpeBuilder.part(Part.APPLICATION).vendor(vendor).product(product);
1051                     addVersionAndUpdate(depVersion, cpeBuilder);
1052                     try {
1053                         final Cpe depCpe = cpeBuilder.build();
1054                         final String url = String.format(NVD_SEARCH_URL, URLEncoder.encode(vendor, UTF8),
1055                                 URLEncoder.encode(product, UTF8), URLEncoder.encode(depCpe.getVersion(), UTF8));
1056                         final IdentifierMatch match = new IdentifierMatch(depCpe, url, IdentifierConfidence.EXACT_MATCH, confidence);
1057                         collected.add(match);
1058                     } catch (CpeValidationException ex) {
1059                         throw new AnalysisException(String.format("Unable to create a CPE for %s:%s:%s", vendor, product, depVersion));
1060                     }
1061                 }
1062             }
1063         }
1064     }
1065 
1066     /**
1067      * If a CPE product word represents the ecosystem of a dependency it is not required
1068      * to appear in the dependencyName to still consider the CPE product a match.
1069      *
1070      * @param ecosystem The ecosystem of the dependency
1071      * @param word       The word from the CPE product to check
1072      * @return {@code true} when the CPE product word is known to match the ecosystem of the dependency
1073      * @implNote This method is not intended to cover every possible case where the ecosystem is represented by the word. It is a
1074      * best-effort attempt to prevent {@link #considerDependencyVersion(Dependency, String, String, Confidence, Set)}
1075      * from not taking an exact-match versioned CPE into account because the ecosystem-related word does not appear in
1076      * the dependencyName. It helps prevent false-positive cases like https://github.com/jeremylong/DependencyCheck/issues/5545
1077      * @see #considerDependencyVersion(Dependency, String, String, Confidence, Set)
1078      */
1079     private boolean wordMatchesEcosystem(@Nullable String ecosystem, String word) {
1080         if (Ecosystem.JAVA.equalsIgnoreCase(word)) {
1081             return Ecosystem.JAVA.equals(ecosystem);
1082         }
1083         return false;
1084     }
1085 
1086     /**
1087      * <p>
1088      * Returns the setting key to determine if the analyzer is enabled.</p>
1089      *
1090      * @return the key for the analyzer's enabled property
1091      */
1092     @Override
1093     protected String getAnalyzerEnabledSettingKey() {
1094         return Settings.KEYS.ANALYZER_CPE_ENABLED;
1095     }
1096 
1097     /**
1098      * Filters the given list of CPE Entries (plus ecosystem) for the given
1099      * dependencies ecosystem.
1100      *
1101      * @param ecosystem the dependencies ecosystem
1102      * @param entries the CPE Entries (plus ecosystem)
1103      * @return the filtered list of CPE entries
1104      */
1105     private Set<Cpe> filterEcosystem(String ecosystem, Set<CpePlus> entries) {
1106         if (entries == null || entries.isEmpty()) {
1107             return null;
1108         }
1109         if (ecosystem != null) {
1110             return entries.stream().filter(c
1111                     -> c.getEcosystem() == null
1112                     || c.getEcosystem().equals(ecosystem)
1113                     //some ios CVE/CPEs are listed under native
1114                     || (Ecosystem.IOS.equals(ecosystem) && Ecosystem.NATIVE.equals(c.getEcosystem())))
1115                     .map(CpePlus::getCpe)
1116                     .collect(Collectors.toSet());
1117         }
1118         return entries.stream()
1119                 .map(CpePlus::getCpe)
1120                 .collect(Collectors.toSet());
1121     }
1122 
1123     /**
1124      * Add the given version to the CpeBuilder - this method attempts to parse
1125      * out the update from the version and correctly set the value in the CPE.
1126      *
1127      * @param depVersion the version to add
1128      * @param cpeBuilder a reference to the CPE Builder
1129      */
1130     private void addVersionAndUpdate(DependencyVersion depVersion, final CpeBuilder cpeBuilder) {
1131         final int idx = depVersion.getVersionParts().size() - 1;
1132         if (idx > 0 && depVersion.getVersionParts().get(idx)
1133                 .matches("^(v|final|release|snapshot|r|b|beta|a|alpha|u|rc|sp|dev|revision|service|build|pre|p|patch|update|m|20\\d\\d).*$")) {
1134             cpeBuilder.version(StringUtils.join(depVersion.getVersionParts().subList(0, idx), "."));
1135             //when written - no update versions in the NVD start with v### - they all strip the v off
1136             if (depVersion.getVersionParts().get(idx).matches("^v\\d.*$")) {
1137                 cpeBuilder.update(depVersion.getVersionParts().get(idx).substring(1));
1138             } else {
1139                 cpeBuilder.update(depVersion.getVersionParts().get(idx));
1140             }
1141         } else {
1142             cpeBuilder.version(depVersion.toString());
1143         }
1144     }
1145 
1146     /**
1147      * The confidence whether the identifier is an exact match, or a best guess.
1148      */
1149     private enum IdentifierConfidence {
1150 
1151         /**
1152          * An exact match for the CPE.
1153          */
1154         EXACT_MATCH,
1155         /**
1156          * A best guess for the CPE.
1157          */
1158         BEST_GUESS,
1159         /**
1160          * The entire vendor/product group must be added (without a guess at
1161          * version) because there is a CVE with a VS that only specifies
1162          * vendor/product.
1163          */
1164         BROAD_MATCH
1165     }
1166 
1167     /**
1168      * A simple object to hold an identifier and carry information about the
1169      * confidence in the identifier.
1170      */
1171     private static class IdentifierMatch implements Comparable<IdentifierMatch> {
1172 
1173         /**
1174          * The confidence whether this is an exact match, or a best guess.
1175          */
1176         private IdentifierConfidence identifierConfidence;
1177         /**
1178          * The CPE identifier.
1179          */
1180         private CpeIdentifier identifier;
1181 
1182         /**
1183          * Constructs an IdentifierMatch.
1184          *
1185          * @param cpe the CPE value for the match
1186          * @param url the URL of the identifier
1187          * @param identifierConfidence the confidence in the identifier: best
1188          * guess or exact match
1189          * @param evidenceConfidence the confidence of the evidence used to find
1190          * the identifier
1191          */
1192         IdentifierMatch(Cpe cpe, String url, IdentifierConfidence identifierConfidence, Confidence evidenceConfidence) {
1193             this.identifier = new CpeIdentifier(cpe, url, evidenceConfidence);
1194             this.identifierConfidence = identifierConfidence;
1195         }
1196 
1197         //<editor-fold defaultstate="collapsed" desc="Property implementations: evidenceConfidence, confidence, identifier">
1198         /**
1199          * Get the value of evidenceConfidence
1200          *
1201          * @return the value of evidenceConfidence
1202          */
1203         public Confidence getEvidenceConfidence() {
1204             return this.identifier.getConfidence();
1205         }
1206 
1207         /**
1208          * Set the value of evidenceConfidence
1209          *
1210          * @param evidenceConfidence new value of evidenceConfidence
1211          */
1212         public void setEvidenceConfidence(Confidence evidenceConfidence) {
1213             this.identifier.setConfidence(evidenceConfidence);
1214         }
1215 
1216         /**
1217          * Get the value of confidence.
1218          *
1219          * @return the value of confidence
1220          */
1221         public IdentifierConfidence getIdentifierConfidence() {
1222             return identifierConfidence;
1223         }
1224 
1225         /**
1226          * Set the value of confidence.
1227          *
1228          * @param confidence new value of confidence
1229          */
1230         public void setIdentifierConfidence(IdentifierConfidence confidence) {
1231             this.identifierConfidence = confidence;
1232         }
1233 
1234         /**
1235          * Get the value of identifier.
1236          *
1237          * @return the value of identifier
1238          */
1239         public CpeIdentifier getIdentifier() {
1240             return identifier;
1241         }
1242 
1243         /**
1244          * Set the value of identifier.
1245          *
1246          * @param identifier new value of identifier
1247          */
1248         public void setIdentifier(CpeIdentifier identifier) {
1249             this.identifier = identifier;
1250         }
1251         //</editor-fold>
1252         //<editor-fold defaultstate="collapsed" desc="Standard implementations of toString, hashCode, and equals">
1253 
1254         /**
1255          * Standard toString() implementation.
1256          *
1257          * @return the string representation of the object
1258          */
1259         @Override
1260         public String toString() {
1261             return "IdentifierMatch{ IdentifierConfidence=" + identifierConfidence + ", identifier=" + identifier + '}';
1262         }
1263 
1264         /**
1265          * Standard hashCode() implementation.
1266          *
1267          * @return the hashCode
1268          */
1269         @Override
1270         public int hashCode() {
1271             return new HashCodeBuilder(115, 303)
1272                     .append(identifierConfidence)
1273                     .append(identifier)
1274                     .toHashCode();
1275         }
1276 
1277         /**
1278          * Standard equals implementation.
1279          *
1280          * @param obj the object to compare
1281          * @return true if the objects are equal, otherwise false
1282          */
1283         @Override
1284         public boolean equals(Object obj) {
1285             if (obj == null || !(obj instanceof IdentifierMatch)) {
1286                 return false;
1287             }
1288             if (this == obj) {
1289                 return true;
1290             }
1291             final IdentifierMatch other = (IdentifierMatch) obj;
1292             return new EqualsBuilder()
1293                     .append(identifierConfidence, other.identifierConfidence)
1294                     .append(identifier, other.identifier)
1295                     .build();
1296         }
1297         //</editor-fold>
1298 
1299         /**
1300          * Standard implementation of compareTo that compares identifier
1301          * confidence, evidence confidence, and then the identifier.
1302          *
1303          * @param o the IdentifierMatch to compare to
1304          * @return the natural ordering of IdentifierMatch
1305          */
1306         @Override
1307         public int compareTo(@NotNull IdentifierMatch o) {
1308             return new CompareToBuilder()
1309                     .append(identifierConfidence, o.identifierConfidence)
1310                     .append(identifier, o.identifier)
1311                     .toComparison();
1312         }
1313     }
1314 
1315     /**
1316      * Command line tool for querying the Lucene CPE Index.
1317      *
1318      * @param args not used
1319      */
1320     @SuppressWarnings("InfiniteLoopStatement")
1321     public static void main(String[] args) {
1322         final Settings props = new Settings();
1323         try (Engine en = new Engine(Engine.Mode.EVIDENCE_PROCESSING, props)) {
1324             en.openDatabase(false, false);
1325             final CPEAnalyzer analyzer = new CPEAnalyzer();
1326             analyzer.initialize(props);
1327             analyzer.prepareAnalyzer(en);
1328             LOGGER.error("test");
1329             System.out.println("Memory index query for ODC");
1330             try (BufferedReader br = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8))) {
1331                 while (true) {
1332 
1333                     final Map<String, MutableInt> vendor = new HashMap<>();
1334                     final Map<String, MutableInt> product = new HashMap<>();
1335                     System.out.print("Vendor: ");
1336                     String[] parts = br.readLine().split(" ");
1337                     for (String term : parts) {
1338                         final MutableInt count = vendor.get(term);
1339                         if (count == null) {
1340                             vendor.put(term, new MutableInt(0));
1341                         } else {
1342                             count.add(1);
1343                         }
1344                     }
1345                     System.out.print("Product: ");
1346                     parts = br.readLine().split(" ");
1347                     for (String term : parts) {
1348                         final MutableInt count = product.get(term);
1349                         if (count == null) {
1350                             product.put(term, new MutableInt(0));
1351                         } else {
1352                             count.add(1);
1353                         }
1354                     }
1355                     final List<IndexEntry> list = analyzer.searchCPE(vendor, product, new HashSet<>(), new HashSet<>(), "default");
1356                     if (list == null || list.isEmpty()) {
1357                         System.out.println("No results found");
1358                     } else {
1359                         list.forEach((e) -> System.out.printf("%s:%s (%f)%n", e.getVendor(), e.getProduct(),
1360                                 e.getSearchScore()));
1361                     }
1362                     System.out.println();
1363                     System.out.println();
1364                 }
1365             }
1366         } catch (InitializationException | IOException ex) {
1367             System.err.println("Lucene ODC search tool failed:");
1368             System.err.println(ex.getMessage());
1369         }
1370     }
1371 
1372     /**
1373      * Sets the reference to the CveDB.
1374      *
1375      * @param cveDb the CveDB
1376      */
1377     protected void setCveDB(CveDB cveDb) {
1378         this.cve = cveDb;
1379     }
1380 
1381     /**
1382      * returns a reference to the CveDB.
1383      *
1384      * @return a reference to the CveDB
1385      */
1386     protected CveDB getCveDB() {
1387         return this.cve;
1388     }
1389 
1390     /**
1391      * Sets the MemoryIndex.
1392      *
1393      * @param idx the memory index
1394      */
1395     protected void setMemoryIndex(MemoryIndex idx) {
1396         cpe = idx;
1397     }
1398 
1399     /**
1400      * Returns the memory index.
1401      *
1402      * @return the memory index
1403      */
1404     protected MemoryIndex getMemoryIndex() {
1405         return cpe;
1406     }
1407 
1408     /**
1409      * Sets the CPE Suppression Analyzer.
1410      *
1411      * @param suppression the CPE Suppression Analyzer
1412      */
1413     protected void setCpeSuppressionAnalyzer(CpeSuppressionAnalyzer suppression) {
1414         this.suppression = suppression;
1415     }
1416 }