AbstractMemoryIndex.java
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2013 Jeremy Long. All Rights Reserved.
*/
package org.owasp.dependencycheck.data.cpe;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.MMapDirectory;
import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
import org.owasp.dependencycheck.utils.Pair;
import org.owasp.dependencycheck.utils.Settings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* An in memory Lucene index that contains the vendor/product combinations from
* the CPE (application) identifiers within the NVD CVE data.</p>
*
* This is the last remaining singleton in dependency-check-core; The use of
* this singleton - while it may not technically be thread-safe (one database
* used to build this index may not have the same entries as another) the risk
* of this is currently believed to be small. As this memory index consumes a
* large amount of memory we will remain using the singleton pattern for now.
*
* @author Jeremy Long
*/
@ThreadSafe
public abstract class AbstractMemoryIndex implements MemoryIndex {
/**
* The logger.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(AbstractMemoryIndex.class);
/**
* The in memory Lucene index.
*/
private MMapDirectory index;
/**
* The Lucene IndexReader.
*/
private IndexReader indexReader;
/**
* The Lucene IndexSearcher.
*/
private IndexSearcher indexSearcher;
/**
* The Lucene Analyzer used for Searching.
*/
private Analyzer searchingAnalyzer;
/**
* The Lucene QueryParser used for Searching.
*/
private QueryParser queryParser;
/**
* The product field analyzer.
*/
private SearchFieldAnalyzer productFieldAnalyzer;
/**
* The vendor field analyzer.
*/
private SearchFieldAnalyzer vendorFieldAnalyzer;
/**
* Track the number of current users of the Lucene index; used to track it
* it is okay to actually close the index.
*/
private final AtomicInteger usageCount = new AtomicInteger(0);
/**
* Returns a reference to the instance.
*
* @return a reference to the instance
*/
protected abstract AbstractMemoryIndex instance();
/**
* Creates and loads data into an in memory index.
*
* @param data the CPE data
* @param settings a reference to the dependency-check settings
* @throws IndexException thrown if there is an error creating the index
*/
@Override
public synchronized void open(Set<Pair<String, String>> data, Settings settings) throws IndexException {
if (instance().usageCount.addAndGet(1) == 1) {
try {
final File temp = settings.getTempDirectory();
index = new MMapDirectory(temp.toPath());
buildIndex(data);
indexReader = DirectoryReader.open(index);
} catch (IOException ex) {
throw new IndexException(ex);
}
indexSearcher = new IndexSearcher(indexReader);
searchingAnalyzer = createSearchingAnalyzer();
queryParser = new QueryParser(Fields.DOCUMENT_KEY, searchingAnalyzer);
}
}
/**
* returns whether or not the index is open.
*
* @return whether or not the index is open
*/
public synchronized boolean isOpen() {
return instance().usageCount.get() > 0;
}
/**
* Creates an Analyzer for searching the CPE Index.
*
* @return the CPE Analyzer.
*/
private Analyzer createSearchingAnalyzer() {
final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
productFieldAnalyzer = new SearchFieldAnalyzer();
vendorFieldAnalyzer = new SearchFieldAnalyzer();
fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);
return new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers);
}
/**
* Closes the CPE Index.
*/
@Override
public synchronized void close() {
if (instance().usageCount.addAndGet(-1) == 0) {
if (searchingAnalyzer != null) {
searchingAnalyzer.close();
searchingAnalyzer = null;
}
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException ex) {
LOGGER.trace("", ex);
}
indexReader = null;
}
queryParser = null;
indexSearcher = null;
if (index != null) {
try {
index.close();
} catch (IOException ex) {
LOGGER.trace("", ex);
}
index = null;
}
}
}
/**
* Builds the CPE Lucene Index based off of the data within the CveDB.
*
* @param data the CPE data
* @throws IndexException thrown if there is an issue creating the index
*/
private void buildIndex(Set<Pair<String, String>> data) throws IndexException {
try (Analyzer analyzer = createSearchingAnalyzer();
IndexWriter indexWriter = new IndexWriter(index,
new IndexWriterConfig(analyzer))) {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
//ignore term frequency
ft.setIndexOptions(IndexOptions.DOCS);
//ignore field length normalization
ft.setOmitNorms(true);
// Tip: reuse the Document and Fields for performance...
// See "Re-use Document and Field instances" from
// http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
final Document doc = new Document();
final Field v = new Field(Fields.VENDOR, Fields.VENDOR, ft);
final Field p = new Field(Fields.PRODUCT, Fields.PRODUCT, ft);
doc.add(v);
doc.add(p);
for (Pair<String, String> pair : data) {
if (pair.getLeft() != null && pair.getRight() != null) {
v.setStringValue(pair.getLeft());
p.setStringValue(pair.getRight());
indexWriter.addDocument(doc);
resetAnalyzers();
}
}
indexWriter.commit();
} catch (DatabaseException ex) {
LOGGER.debug("", ex);
throw new IndexException("Error reading CPE data", ex);
} catch (IOException ex) {
throw new IndexException("Unable to close an in-memory index", ex);
}
}
/**
* Searches the index using the given search string.
*
* @param searchString the query text
* @param maxQueryResults the maximum number of documents to return
* @return the TopDocs found by the search
* @throws ParseException thrown when the searchString is invalid
* @throws IndexException thrown when there is an internal error resetting
* the search analyzer
* @throws IOException is thrown if there is an issue with the underlying
* Index
*/
@Override
public synchronized TopDocs search(String searchString, int maxQueryResults) throws ParseException, IndexException, IOException {
final Query query = parseQuery(searchString);
return search(query, maxQueryResults);
}
/**
* Parses the given string into a Lucene Query.
*
* @param searchString the search text
* @return the Query object
* @throws ParseException thrown if the search text cannot be parsed
* @throws IndexException thrown if there is an error resetting the
* analyzers
*/
@Override
public synchronized Query parseQuery(String searchString) throws ParseException, IndexException {
if (searchString == null || searchString.trim().isEmpty()
|| "product:() AND vendor:()".equals(searchString)) {
throw new ParseException("Query is null or empty");
}
LOGGER.debug(searchString);
Query query;
try {
query = queryParser.parse(searchString);
} catch (BooleanQuery.TooManyClauses ex) {
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
query = queryParser.parse(searchString);
} catch (ParseException ex) {
if (ex.getMessage() != null && ex.getMessage().contains("too many boolean clauses")) {
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
query = queryParser.parse(searchString);
} else {
LOGGER.debug("Parse Excepction", ex);
throw ex;
}
}
try {
resetAnalyzers();
} catch (IOException ex) {
throw new IndexException("Unable to reset the analyzer after parsing", ex);
}
return query;
}
/**
* Searches the index using the given query.
*
* @param query the query used to search the index
* @param maxQueryResults the max number of results to return
* @return the TopDocs found be the query
* @throws CorruptIndexException thrown if the Index is corrupt
* @throws IOException thrown if there is an IOException
*/
@Override
public synchronized TopDocs search(Query query, int maxQueryResults) throws CorruptIndexException, IOException {
return indexSearcher.search(query, maxQueryResults);
}
/**
* Retrieves a document from the Index.
*
* @param documentId the id of the document to retrieve
* @return the Document
* @throws IOException thrown if there is an IOException
*/
@Override
public synchronized Document getDocument(int documentId) throws IOException {
return indexSearcher.doc(documentId);
}
/**
* Returns the number of CPE entries stored in the index.
*
* @return the number of CPE entries stored in the index
*/
public synchronized int numDocs() {
if (indexReader == null) {
return -1;
}
return indexReader.numDocs();
}
/**
* Method to explain queries matches.
*
* @param query the query used
* @param doc the document matched
* @return the explanation
* @throws IOException thrown if there is an index error
*/
public synchronized String explain(Query query, int doc) throws IOException {
return indexSearcher.explain(query, doc).toString();
}
/**
* Common method to reset the searching analyzers.
*
* @throws IOException thrown if there is an index error
*/
protected synchronized void resetAnalyzers() throws IOException {
productFieldAnalyzer.reset();
vendorFieldAnalyzer.reset();
}
}