View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import com.github.packageurl.MalformedPackageURLException;
21  import com.github.packageurl.PackageURL;
22  import com.github.packageurl.PackageURLBuilder;
23  import org.apache.commons.io.filefilter.NameFileFilter;
24  import org.apache.commons.io.filefilter.SuffixFileFilter;
25  import org.owasp.dependencycheck.Engine;
26  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
27  import org.owasp.dependencycheck.data.nvd.ecosystem.Ecosystem;
28  import org.owasp.dependencycheck.dependency.Confidence;
29  import org.owasp.dependencycheck.dependency.Dependency;
30  import org.owasp.dependencycheck.dependency.EvidenceType;
31  import org.owasp.dependencycheck.dependency.naming.GenericIdentifier;
32  import org.owasp.dependencycheck.dependency.naming.PurlIdentifier;
33  import org.owasp.dependencycheck.exception.InitializationException;
34  import org.owasp.dependencycheck.utils.FileFilterBuilder;
35  import org.owasp.dependencycheck.utils.Settings;
36  import org.owasp.dependencycheck.utils.UrlStringUtils;
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  import javax.annotation.concurrent.ThreadSafe;
41  import java.io.File;
42  import java.io.FileFilter;
43  import java.io.IOException;
44  import java.nio.charset.StandardCharsets;
45  import java.nio.file.Files;
46  import java.util.regex.Matcher;
47  import java.util.regex.Pattern;
48  
49  /**
50   * Used to analyze a Python package, and collect information that can be used to
51   * determine the associated CPE.
52   *
53   * @author Dale Visser
54   */
55  @Experimental
56  @ThreadSafe
57  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
58  
59      /**
60       * The logger.
61       */
62      private static final Logger LOGGER = LoggerFactory.getLogger(PythonPackageAnalyzer.class);
63  
64      /**
65       * A descriptor for the type of dependencies processed or added by this
66       * analyzer.
67       */
68      public static final String DEPENDENCY_ECOSYSTEM = Ecosystem.PYTHON;
69  
70      /**
71       * Used when compiling file scanning regex patterns.
72       */
73      private static final int REGEX_OPTIONS = Pattern.DOTALL | Pattern.CASE_INSENSITIVE;
74  
75      /**
76       * Filename extensions for files to be analyzed.
77       */
78      private static final String EXTENSIONS = "py";
79  
80      /**
81       * Pattern for matching the module doc string in a source file.
82       */
83      private static final Pattern MODULE_DOCSTRING = Pattern.compile("^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
84  
85      /**
86       * Matches assignments to version variables in Python source code.
87       */
88      private static final Pattern VERSION_PATTERN = Pattern.compile("\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
89              REGEX_OPTIONS);
90  
91      /**
92       * Matches assignments to title variables in Python source code.
93       */
94      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
95  
96      /**
97       * Matches assignments to summary variables in Python source code.
98       */
99      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
100 
101     /**
102      * Matches assignments to URL/URL variables in Python source code.
103      */
104     private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
105 
106     /**
107      * Matches assignments to home page variables in Python source code.
108      */
109     private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
110 
111     /**
112      * Matches assignments to author variables in Python source code.
113      */
114     private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
115 
116     /**
117      * Filter that detects files named "__init__.py".
118      */
119     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
120 
121     /**
122      * The file filter for python files.
123      */
124     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
125 
126     /**
127      * The file filter used to determine which files this analyzer supports.
128      */
129     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
130 
131     /**
132      * Returns the name of the Python Package Analyzer.
133      *
134      * @return the name of the analyzer
135      */
136     @Override
137     public String getName() {
138         return "Python Package Analyzer";
139     }
140 
141     /**
142      * Tell that we are used for information collection.
143      *
144      * @return INFORMATION_COLLECTION
145      */
146     @Override
147     public AnalysisPhase getAnalysisPhase() {
148         return AnalysisPhase.INFORMATION_COLLECTION;
149     }
150 
151     /**
152      * Returns the key name for the analyzers enabled setting.
153      *
154      * @return the key name for the analyzers enabled setting
155      */
156     @Override
157     protected String getAnalyzerEnabledSettingKey() {
158         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
159     }
160 
161     /**
162      * Returns the FileFilter
163      *
164      * @return the FileFilter
165      */
166     @Override
167     protected FileFilter getFileFilter() {
168         return FILTER;
169     }
170 
171     /**
172      * No-op initializer implementation.
173      *
174      * @param engine a reference to the dependency-check engine
175      * @throws InitializationException never thrown
176      */
177     @Override
178     protected void prepareFileTypeAnalyzer(Engine engine) throws InitializationException {
179         // Nothing to do here.
180     }
181 
182     /**
183      * Utility function to create a regex pattern matcher.
184      *
185      * @param name the value to use when constructing the assignment pattern
186      * @return the compiled Pattern
187      */
188     private static Pattern compileAssignPattern(String name) {
189         return Pattern.compile(
190                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
191                 REGEX_OPTIONS);
192     }
193 
194     /**
195      * Analyzes python packages and adds evidence to the dependency.
196      *
197      * @param dependency the dependency being analyzed
198      * @param engine the engine being used to perform the scan
199      * @throws AnalysisException thrown if there is an unrecoverable error
200      * analyzing the dependency
201      */
202     @Override
203     protected void analyzeDependency(Dependency dependency, Engine engine)
204             throws AnalysisException {
205         dependency.setEcosystem(DEPENDENCY_ECOSYSTEM);
206         final File file = dependency.getActualFile();
207         final File parent = file.getParentFile();
208         final String parentName = parent.getName();
209         if (INIT_PY_FILTER.accept(file)) {
210             //by definition, the containing folder of __init__.py is considered the package, even the file is empty:
211             //"The __init__.py files are required to make Python treat the directories as containing packages"
212             //see section "6.4 Packages" from https://docs.python.org/2/tutorial/modules.html;
213             dependency.addEvidence(EvidenceType.PRODUCT, file.getName(), "PackageName", parentName, Confidence.HIGHEST);
214             dependency.setName(parentName);
215 
216             final File[] fileList = parent.listFiles(PY_FILTER);
217             if (fileList != null) {
218                 for (final File sourceFile : fileList) {
219                     analyzeFileContents(dependency, sourceFile);
220                 }
221             }
222         } else {
223             engine.removeDependency(dependency);
224         }
225     }
226 
227     /**
228      * This should gather information from leading docstrings, file comments,
229      * and assignments to __version__, __title__, __summary__, __uri__, __url__,
230      * __home*page__, __author__, and their all caps equivalents.
231      *
232      * @param dependency the dependency being analyzed
233      * @param file the file name to analyze
234      * @throws AnalysisException thrown if there is an unrecoverable error
235      */
236     private void analyzeFileContents(Dependency dependency, File file)
237             throws AnalysisException {
238         final String contents;
239         try {
240             contents = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8).trim();
241         } catch (IOException e) {
242             throw new AnalysisException("Problem occurred while reading dependency file.", e);
243         }
244         if (!contents.isEmpty()) {
245             final String source = file.getName();
246             gatherEvidence(dependency, EvidenceType.VERSION, VERSION_PATTERN, contents,
247                     source, "SourceVersion", Confidence.MEDIUM);
248             addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
249                     source, "summary");
250             if (INIT_PY_FILTER.accept(file)) {
251                 addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
252                         contents, source, "docstring");
253             }
254             gatherEvidence(dependency, EvidenceType.PRODUCT, TITLE_PATTERN, contents,
255                     source, "SourceTitle", Confidence.LOW);
256 
257             gatherEvidence(dependency, EvidenceType.VENDOR, AUTHOR_PATTERN, contents,
258                     source, "SourceAuthor", Confidence.MEDIUM);
259             gatherHomePageEvidence(dependency, EvidenceType.VENDOR, URI_PATTERN,
260                     source, "URL", contents);
261             gatherHomePageEvidence(dependency, EvidenceType.VENDOR, HOMEPAGE_PATTERN,
262                     source, "HomePage", contents);
263 
264             try {
265                 final PackageURLBuilder builder = PackageURLBuilder.aPackageURL().withType("pypi").withName(dependency.getName());
266                 if (dependency.getVersion() != null) {
267                     builder.withVersion(dependency.getVersion());
268                 }
269                 final PackageURL purl = builder.build();
270                 dependency.addSoftwareIdentifier(new PurlIdentifier(purl, Confidence.HIGHEST));
271             } catch (MalformedPackageURLException ex) {
272                 LOGGER.debug("Unable to build package url for python", ex);
273                 final GenericIdentifier id;
274                 if (dependency.getVersion() != null) {
275                     id = new GenericIdentifier("generic:" + dependency.getName() + "@" + dependency.getVersion(), Confidence.HIGHEST);
276                 } else {
277                     id = new GenericIdentifier("generic:" + dependency.getName(), Confidence.HIGHEST);
278                 }
279                 dependency.addSoftwareIdentifier(id);
280             }
281         }
282     }
283 
284     /**
285      * Adds summary information to the dependency
286      *
287      * @param dependency the dependency being analyzed
288      * @param pattern the pattern used to perform analysis
289      * @param group the group from the pattern that indicates the data to use
290      * @param contents the data being analyzed
291      * @param source the source name to use when recording the evidence
292      * @param key the key name to use when recording the evidence
293      */
294     private void addSummaryInfo(Dependency dependency, Pattern pattern,
295                                 int group, String contents, String source, String key) {
296         final Matcher matcher = pattern.matcher(contents);
297         final boolean found = matcher.find();
298         if (found) {
299             JarAnalyzer.addDescription(dependency, matcher.group(group),
300                     source, key);
301         }
302     }
303 
304     /**
305      * Collects evidence from the home page URL.
306      *
307      * @param dependency the dependency that is being analyzed
308      * @param type the type of evidence
309      * @param pattern the pattern to match
310      * @param source the source of the evidence
311      * @param name the name of the evidence
312      * @param contents the home page URL
313      */
314     private void gatherHomePageEvidence(Dependency dependency, EvidenceType type, Pattern pattern,
315                                         String source, String name, String contents) {
316         final Matcher matcher = pattern.matcher(contents);
317         if (matcher.find()) {
318             final String url = matcher.group(4);
319             if (UrlStringUtils.isUrl(url)) {
320                 dependency.addEvidence(type, source, name, url, Confidence.MEDIUM);
321             }
322         }
323     }
324 
325     /**
326      * Gather evidence from a Python source file using the given string
327      * assignment regex pattern.
328      *
329      * @param dependency the dependency that is being analyzed
330      * @param type the type of evidence
331      * @param pattern to scan contents with
332      * @param contents of Python source file
333      * @param source for storing evidence
334      * @param name of evidence
335      * @param confidence in evidence
336      */
337     private void gatherEvidence(Dependency dependency, EvidenceType type, Pattern pattern, String contents,
338                                 String source, String name, Confidence confidence) {
339         final Matcher matcher = pattern.matcher(contents);
340         final boolean found = matcher.find();
341         if (found) {
342             dependency.addEvidence(type, source, name, matcher.group(4), confidence);
343             if (type == EvidenceType.VERSION) {
344                 //TODO - this seems broken as we are cycling over py files and could be grabbing versions from multiple?
345                 dependency.setVersion(matcher.group(4));
346                 final String dispName = String.format("%s:%s", dependency.getName(), dependency.getVersion());
347                 dependency.setDisplayFileName(dispName);
348             }
349         }
350     }
351 }