PythonDistributionAnalyzer.java
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
*/
package org.owasp.dependencycheck.analyzer;
import com.github.packageurl.MalformedPackageURLException;
import com.github.packageurl.PackageURL;
import com.github.packageurl.PackageURLBuilder;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import org.apache.commons.io.filefilter.NameFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.owasp.dependencycheck.Engine;
import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
import org.owasp.dependencycheck.dependency.Confidence;
import org.owasp.dependencycheck.dependency.Dependency;
import org.owasp.dependencycheck.utils.PyPACoreMetadataParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.owasp.dependencycheck.exception.InitializationException;
import org.owasp.dependencycheck.utils.ExtractionException;
import org.owasp.dependencycheck.utils.ExtractionUtil;
import org.owasp.dependencycheck.utils.FileFilterBuilder;
import org.owasp.dependencycheck.utils.FileUtils;
import org.owasp.dependencycheck.utils.Settings;
import org.owasp.dependencycheck.utils.UrlStringUtils;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.concurrent.ThreadSafe;
import org.owasp.dependencycheck.data.nvd.ecosystem.Ecosystem;
import org.owasp.dependencycheck.dependency.EvidenceType;
import org.owasp.dependencycheck.dependency.naming.GenericIdentifier;
import org.owasp.dependencycheck.dependency.naming.PurlIdentifier;
/**
* Used to analyze a Wheel or egg distribution files, or their contents in
* unzipped form, and collect information that can be used to determine the
* associated CPE.
*
* @author Dale Visser
*/
@Experimental
@ThreadSafe
public class PythonDistributionAnalyzer extends AbstractFileTypeAnalyzer {
/**
* A descriptor for the type of dependencies processed or added by this
* analyzer.
*/
public static final String DEPENDENCY_ECOSYSTEM = Ecosystem.PYTHON;
/**
* Name of egg metadata files to analyze.
*/
private static final String PKG_INFO = "PKG-INFO";
/**
* Name of wheel metadata files to analyze.
*/
private static final String METADATA = "METADATA";
/**
* The logger.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(PythonDistributionAnalyzer.class);
/**
* The count of directories created during analysis. This is used for
* creating temporary directories.
*/
private static final AtomicInteger DIR_COUNT = new AtomicInteger(0);
/**
* The name of the analyzer.
*/
private static final String ANALYZER_NAME = "Python Distribution Analyzer";
/**
* The phase that this analyzer is intended to run in.
*/
private static final AnalysisPhase ANALYSIS_PHASE = AnalysisPhase.INFORMATION_COLLECTION;
/**
* The set of file extensions supported by this analyzer.
*/
private static final String[] EXTENSIONS = {"whl", "egg", "zip"};
/**
* Used to match on egg archive candidate extensions.
*/
private static final FileFilter EGG_OR_ZIP = FileFilterBuilder.newInstance().addExtensions("egg", "zip").build();
/**
* Used to detect files with a .whl extension.
*/
private static final FileFilter WHL_FILTER = FileFilterBuilder.newInstance().addExtensions("whl").build();
/**
* The parent directory for the individual directories per archive.
*/
private File tempFileLocation;
/**
* Filter that detects *.dist-info files (but doesn't verify they are
* directories.
*/
private static final FilenameFilter DIST_INFO_FILTER = new SuffixFileFilter(".dist-info");
/**
* Filter that detects files named "METADATA".
*/
private static final FilenameFilter EGG_INFO_FILTER = new NameFileFilter("EGG-INFO");
/**
* Filter that detects files named "METADATA".
*/
private static final NameFileFilter METADATA_FILTER = new NameFileFilter(METADATA);
/**
* Filter that detects files named "PKG-INFO".
*/
private static final NameFileFilter PKG_INFO_FILTER = new NameFileFilter(PKG_INFO);
/**
* The file filter used to determine which files this analyzer supports.
*/
private static final FileFilter FILTER = FileFilterBuilder.newInstance().addFileFilters(
METADATA_FILTER, PKG_INFO_FILTER).addExtensions(EXTENSIONS).build();
/**
* Returns the FileFilter
*
* @return the FileFilter
*/
@Override
protected FileFilter getFileFilter() {
return FILTER;
}
/**
* Returns the name of the analyzer.
*
* @return the name of the analyzer.
*/
@Override
public String getName() {
return ANALYZER_NAME;
}
/**
* Returns the phase that the analyzer is intended to run in.
*
* @return the phase that the analyzer is intended to run in.
*/
@Override
public AnalysisPhase getAnalysisPhase() {
return ANALYSIS_PHASE;
}
/**
* Returns the key used in the properties file to reference the analyzer's
* enabled property.
*
* @return the analyzer's enabled property setting key
*/
@Override
protected String getAnalyzerEnabledSettingKey() {
return Settings.KEYS.ANALYZER_PYTHON_DISTRIBUTION_ENABLED;
}
@Override
protected void analyzeDependency(Dependency dependency, Engine engine)
throws AnalysisException {
dependency.setEcosystem(DEPENDENCY_ECOSYSTEM);
final File actualFile = dependency.getActualFile();
if (WHL_FILTER.accept(actualFile)) {
collectMetadataFromArchiveFormat(dependency, DIST_INFO_FILTER,
METADATA_FILTER);
} else if (EGG_OR_ZIP.accept(actualFile)) {
collectMetadataFromArchiveFormat(dependency, EGG_INFO_FILTER,
PKG_INFO_FILTER);
} else {
final String name = actualFile.getName();
final boolean metadata = METADATA.equals(name);
if (metadata || PKG_INFO.equals(name)) {
final File parent = actualFile.getParentFile();
final String parentName = parent.getName();
if (parent.isDirectory()
&& ((metadata && parentName.endsWith(".dist-info"))
|| parentName.endsWith(".egg-info") || "EGG-INFO"
.equals(parentName))) {
collectWheelMetadata(dependency, actualFile);
}
}
}
}
/**
* Collects the meta data from an archive.
*
* @param dependency the archive being scanned
* @param folderFilter the filter to apply to the folder
* @param metadataFilter the filter to apply to the meta data
* @throws AnalysisException thrown when there is a problem analyzing the
* dependency
*/
private void collectMetadataFromArchiveFormat(Dependency dependency,
FilenameFilter folderFilter, FilenameFilter metadataFilter)
throws AnalysisException {
final File temp = getNextTempDirectory();
LOGGER.debug("{} exists? {}", temp, temp.exists());
try {
ExtractionUtil.extractFilesUsingFilter(
new File(dependency.getActualFilePath()), temp,
metadataFilter);
} catch (ExtractionException ex) {
throw new AnalysisException(ex);
}
File matchingFile = getMatchingFile(temp, folderFilter);
if (matchingFile != null) {
matchingFile = getMatchingFile(matchingFile, metadataFilter);
if (matchingFile != null) {
collectWheelMetadata(dependency, matchingFile);
}
}
}
/**
* Makes sure a usable temporary directory is available.
*
* @param engine a reference to the dependency-check engine
* @throws InitializationException an AnalyzeException is thrown when the
* temp directory cannot be created
*/
@Override
protected void prepareFileTypeAnalyzer(Engine engine) throws InitializationException {
try {
final File baseDir = getSettings().getTempDirectory();
tempFileLocation = File.createTempFile("check", "tmp", baseDir);
if (!tempFileLocation.delete()) {
setEnabled(false);
final String msg = String.format(
"Unable to delete temporary file '%s'.",
tempFileLocation.getAbsolutePath());
throw new InitializationException(msg);
}
if (!tempFileLocation.mkdirs()) {
setEnabled(false);
final String msg = String.format(
"Unable to create directory '%s'.",
tempFileLocation.getAbsolutePath());
throw new InitializationException(msg);
}
} catch (IOException ex) {
setEnabled(false);
throw new InitializationException("Unable to create a temporary file", ex);
}
}
/**
* Deletes any files extracted from the Wheel during analysis.
*/
@Override
public void closeAnalyzer() {
if (tempFileLocation != null && tempFileLocation.exists()) {
LOGGER.debug("Attempting to delete temporary files");
final boolean success = FileUtils.delete(tempFileLocation);
if (!success && tempFileLocation.exists()) {
final String[] l = tempFileLocation.list();
if (l != null && l.length > 0) {
LOGGER.warn("Failed to delete some temporary files, see the log for more details");
}
}
}
}
/**
* Gathers evidence from the METADATA file.
*
* @param dependency the dependency being analyzed
* @param file a reference to the manifest/properties file
*/
private static void collectWheelMetadata(Dependency dependency, File file) throws AnalysisException {
final Properties headers = PyPACoreMetadataParser.getProperties(file);
final String version = addPropertyToEvidence(dependency, EvidenceType.VERSION, Confidence.HIGHEST, headers, "Version");
final String name = addPropertyToEvidence(dependency, EvidenceType.VENDOR, Confidence.HIGHEST, headers, "Name");
addPropertyToEvidence(dependency, EvidenceType.PRODUCT, Confidence.HIGHEST, headers, "Name");
final String packagePath = String.format("%s:%s", name, version);
dependency.setName(name);
dependency.setVersion(version);
dependency.setPackagePath(packagePath);
dependency.setDisplayFileName(packagePath);
final String url = headers.getProperty("Home-page", null);
if (StringUtils.isNotBlank(url)) {
if (UrlStringUtils.isUrl(url)) {
dependency.addEvidence(EvidenceType.VENDOR, METADATA, "vendor", url, Confidence.MEDIUM);
}
}
addPropertyToEvidence(dependency, EvidenceType.VENDOR, Confidence.LOW, headers, "Author");
final String summary = headers.getProperty("Summary", null);
if (StringUtils.isNotBlank(summary)) {
JarAnalyzer.addDescription(dependency, summary, METADATA, "summary");
}
try {
final PackageURL purl = PackageURLBuilder.aPackageURL().withType("pypi")
.withName(name).withVersion(version).build();
dependency.addSoftwareIdentifier(new PurlIdentifier(purl, Confidence.HIGHEST));
} catch (MalformedPackageURLException ex) {
LOGGER.debug("Unable to build package url for python", ex);
final GenericIdentifier id = new GenericIdentifier("generic:" + name + "@" + version, Confidence.HIGHEST);
dependency.addSoftwareIdentifier(id);
}
}
/**
* Adds a value to the evidence collection.
*
* @param dependency the dependency being analyzed
* @param type the type of evidence to add
* @param confidence the confidence in the evidence being added
* @param headers the properties collection
* @param property the property name
* @return returns the value of the property if found; otherwise
* <code>null</code>
*/
private static String addPropertyToEvidence(Dependency dependency, EvidenceType type, Confidence confidence,
Properties headers, String property) {
final String value = headers.getProperty(property, null);
LOGGER.debug("Property: {}, Value: {}", property, value);
if (StringUtils.isNotBlank(value)) {
dependency.addEvidence(type, METADATA, property, value, confidence);
}
return value;
}
/**
* Returns a list of files that match the given filter, this does not
* recursively scan the directory.
*
* @param folder the folder to filter
* @param filter the filter to apply to the files in the directory
* @return the list of Files in the directory that match the provided filter
*/
private static File getMatchingFile(File folder, FilenameFilter filter) {
File result = null;
final File[] matches = folder.listFiles(filter);
if (null != matches && 1 == matches.length) {
result = matches[0];
}
return result;
}
/**
* Reads the manifest entries from the provided file.
*
* @param manifest the manifest
* @return the manifest entries
*/
private static Properties getManifestProperties(File manifest) {
final Properties prop = new Properties();
if (null == manifest) {
LOGGER.debug("Manifest file not found.");
} else {
try (InputStream in = new BufferedInputStream(new FileInputStream(manifest))) {
prop.load(in);
} catch (IOException e) {
LOGGER.warn(e.getMessage(), e);
}
}
return prop;
}
/**
* Retrieves the next temporary destination directory for extracting an
* archive.
*
* @return a directory
* @throws AnalysisException thrown if unable to create temporary directory
*/
private File getNextTempDirectory() throws AnalysisException {
File directory;
// getting an exception for some directories not being able to be
// created; might be because the directory already exists?
do {
final int dirCount = DIR_COUNT.incrementAndGet();
directory = new File(tempFileLocation, String.valueOf(dirCount));
} while (directory.exists());
if (!directory.mkdirs()) {
throw new AnalysisException(String.format(
"Unable to create temp directory '%s'.",
directory.getAbsolutePath()));
}
return directory;
}
}