CentralSearch.java
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2014 Jeremy Long. All Rights Reserved.
*/
package org.owasp.dependencycheck.data.central;
import org.apache.hc.client5.http.impl.classic.AbstractHttpClientResponseHandler;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.message.BasicHeader;
import org.owasp.dependencycheck.utils.DownloadFailedException;
import org.owasp.dependencycheck.utils.Downloader;
import org.owasp.dependencycheck.utils.ResourceNotFoundException;
import org.owasp.dependencycheck.utils.TooManyRequestsException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.concurrent.ThreadSafe;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.jcs3.access.exception.CacheException;
import org.owasp.dependencycheck.data.cache.DataCache;
import org.owasp.dependencycheck.data.cache.DataCacheFactory;
import org.owasp.dependencycheck.data.nexus.MavenArtifact;
import org.owasp.dependencycheck.utils.Settings;
import org.owasp.dependencycheck.utils.XmlUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* Class of methods to search Maven Central via Central.
*
* @author colezlaw
*/
@ThreadSafe
public class CentralSearch {
/**
* The URL for the Central service.
*/
private final String rootURL;
/**
* The Central Search Query.
*/
private final String query;
/**
* Whether to use the Proxy when making requests.
*/
private final boolean useProxy;
/**
* Used for logging.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(CentralSearch.class);
/**
* The configured settings.
*/
private final Settings settings;
/**
* Persisted disk cache for `npm audit` results.
*/
private DataCache<List<MavenArtifact>> cache;
/**
* Creates a NexusSearch for the given repository URL.
*
* @param settings the configured settings
* @throws MalformedURLException thrown if the configured URL is invalid
*/
public CentralSearch(Settings settings) throws MalformedURLException {
this.settings = settings;
final String searchUrl = settings.getString(Settings.KEYS.ANALYZER_CENTRAL_URL);
LOGGER.debug("Central Search URL: {}", searchUrl);
if (isInvalidURL(searchUrl)) {
throw new MalformedURLException(String.format("The configured central analyzer URL is invalid: %s", searchUrl));
}
this.rootURL = searchUrl;
final String queryStr = settings.getString(Settings.KEYS.ANALYZER_CENTRAL_QUERY);
LOGGER.debug("Central Search Query: {}", queryStr);
if (!queryStr.matches("^%s.*%s.*$")) {
final String msg = String.format("The configured central analyzer query parameter is invalid (it must have two %%s): %s", queryStr);
throw new MalformedURLException(msg);
}
this.query = queryStr;
LOGGER.debug("Central Search Full URL: {}", String.format(query, rootURL, "[SHA1]"));
if (null != settings.getString(Settings.KEYS.PROXY_SERVER) || null != System.getProperty("https.proxyHost")) {
useProxy = true;
LOGGER.debug("Using proxy");
} else {
useProxy = false;
LOGGER.debug("Not using proxy");
}
if (settings.getBoolean(Settings.KEYS.ANALYZER_CENTRAL_USE_CACHE, true)) {
try {
final DataCacheFactory factory = new DataCacheFactory(settings);
cache = factory.getCentralCache();
} catch (CacheException ex) {
settings.setBoolean(Settings.KEYS.ANALYZER_CENTRAL_USE_CACHE, false);
LOGGER.debug("Error creating cache, disabling caching", ex);
}
}
}
/**
* Searches the configured Central URL for the given SHA1 hash. If the
* artifact is found, a <code>MavenArtifact</code> is populated with the
* GAV.
*
* @param sha1 the SHA-1 hash string for which to search
* @return the populated Maven GAV.
* @throws FileNotFoundException if the specified artifact is not found
* @throws IOException if it's unable to connect to the specified repository
* @throws TooManyRequestsException if Central has received too many
* requests.
*/
public List<MavenArtifact> searchSha1(String sha1) throws IOException, TooManyRequestsException {
if (null == sha1 || !sha1.matches("^[0-9A-Fa-f]{40}$")) {
throw new IllegalArgumentException("Invalid SHA1 format");
}
if (cache != null) {
final List<MavenArtifact> cached = cache.get(sha1);
if (cached != null) {
LOGGER.debug("cache hit for Central: " + sha1);
if (cached.isEmpty()) {
throw new FileNotFoundException("Artifact not found in Central");
}
return cached;
}
}
final List<MavenArtifact> result = new ArrayList<>();
final URL url = new URL(String.format(query, rootURL, sha1));
LOGGER.trace("Searching Central url {}", url);
// JSON would be more elegant, but there's not currently a dependency
// on JSON, so don't want to add one just for this
final BasicHeader acceptHeader = new BasicHeader("Accept", "application/xml");
final AbstractHttpClientResponseHandler<Document> handler = new AbstractHttpClientResponseHandler<>() {
@Override
public Document handleEntity(HttpEntity entity) throws IOException {
try (InputStream in = entity.getContent()) {
final DocumentBuilder builder = XmlUtils.buildSecureDocumentBuilder();
return builder.parse(in);
} catch (ParserConfigurationException | SAXException | IOException e) {
// Anything else is jacked up XML stuff that we really can't recover from well
final String errorMessage = "Failed to parse MavenCentral XML Response: " + e.getMessage();
throw new IOException(errorMessage, e);
}
}
};
try {
final Document doc = Downloader.getInstance().fetchAndHandle(url, handler, List.of(acceptHeader), useProxy);
final boolean missing = addMavenArtifacts(doc, result);
if (missing) {
if (cache != null) {
cache.put(sha1, result);
}
throw new FileNotFoundException("Artifact not found in Central");
}
} catch (XPathExpressionException e) {
final String errorMessage = "Failed to parse MavenCentral XML Response: " + e.getMessage();
throw new IOException(errorMessage, e);
} catch (TooManyRequestsException e) {
final String errorMessage = "Too many requests sent to MavenCentral; additional requests are being rejected.";
throw new TooManyRequestsException(errorMessage, e);
} catch (ResourceNotFoundException | DownloadFailedException e) {
final String errorMessage = "Could not connect to MavenCentral " + e.getMessage();
throw new IOException(errorMessage, e);
}
if (cache != null) {
cache.put(sha1, result);
}
return result;
}
/**
* Collect the artifacts from a MavenCentral search result and add them to the list.
* @param doc The Document received in response to the SHA1 search-request
* @param result The list of MavenArtifacts to which found artifacts will be added
* @return Whether the given document holds no search results
*/
private boolean addMavenArtifacts(Document doc, List<MavenArtifact> result) throws XPathExpressionException {
boolean missing = false;
final XPath xpath = XPathFactory.newInstance().newXPath();
final String numFound = xpath.evaluate("/response/result/@numFound", doc);
if ("0".equals(numFound)) {
missing = true;
} else {
final NodeList docs = (NodeList) xpath.evaluate("/response/result/doc", doc, XPathConstants.NODESET);
for (int i = 0; i < docs.getLength(); i++) {
final String g = xpath.evaluate("./str[@name='g']", docs.item(i));
LOGGER.trace("GroupId: {}", g);
final String a = xpath.evaluate("./str[@name='a']", docs.item(i));
LOGGER.trace("ArtifactId: {}", a);
final String v = xpath.evaluate("./str[@name='v']", docs.item(i));
final NodeList attributes = (NodeList) xpath.evaluate("./arr[@name='ec']/str", docs.item(i), XPathConstants.NODESET);
boolean pomAvailable = false;
boolean jarAvailable = false;
for (int x = 0; x < attributes.getLength(); x++) {
final String tmp = xpath.evaluate(".", attributes.item(x));
if (".pom".equals(tmp)) {
pomAvailable = true;
} else if (".jar".equals(tmp)) {
jarAvailable = true;
}
}
final String centralContentUrl = settings.getString(Settings.KEYS.CENTRAL_CONTENT_URL);
String artifactUrl = null;
String pomUrl = null;
if (jarAvailable) {
//org/springframework/spring-core/3.2.0.RELEASE/spring-core-3.2.0.RELEASE.pom
artifactUrl = centralContentUrl + g.replace('.', '/') + '/' + a + '/'
+ v + '/' + a + '-' + v + ".jar";
}
if (pomAvailable) {
//org/springframework/spring-core/3.2.0.RELEASE/spring-core-3.2.0.RELEASE.pom
pomUrl = centralContentUrl + g.replace('.', '/') + '/' + a + '/'
+ v + '/' + a + '-' + v + ".pom";
}
result.add(new MavenArtifact(g, a, v, artifactUrl, pomUrl));
}
}
return missing;
}
/**
* Tests to determine if the given URL is <b>invalid</b>.
*
* @param url the URL to evaluate
* @return true if the URL is malformed; otherwise false
*/
private boolean isInvalidURL(String url) {
try {
final URL u = new URL(url);
u.toURI();
} catch (MalformedURLException | URISyntaxException e) {
LOGGER.trace("URL is invalid: {}", url);
return true;
}
return false;
}
}