From a9c12b9eb656103671528e9e9f007996623ea787 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Sat, 12 Feb 2022 05:43:29 +1100 Subject: [PATCH] _cli, resolvelib: Support for custom indices (#238) * _cli, resolvelib: Initial support for multiple indices * pypi_provider: Fix docstrings * _cli, resolvelib: Create constant for PyPI url * resolvelib: Document `index_urls` param * test: Add unit test for multiple indices * test: Comments * CHANGELOG: Update changelog * _cli: Check for improper usage of new index flags * README: Update help text * README: Adjust help text * test: Add a test for when a package only exists on one index --- CHANGELOG.md | 8 ++ README.md | 10 ++ pip_audit/_cli.py | 31 +++++- pip_audit/_dependency_source/__init__.py | 3 +- .../_dependency_source/resolvelib/__init__.py | 3 +- .../resolvelib/pypi_provider.py | 37 +++++-- .../resolvelib/resolvelib.py | 5 +- test/dependency_source/test_resolvelib.py | 100 +++++++++++++++++- 8 files changed, 182 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 320b0840..6ee4120a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,14 @@ All versions prior to 0.0.9 are untracked. conjunction with `-r` to check that all requirements in the file have an associated hash ([#229](https://github.com/trailofbits/pip-audit/pull/229)) +* CLI: The `--index-url` flag has been added, allowing users to use custom + package indices when running with the `-r` flag + ([#238](https://github.com/trailofbits/pip-audit/pull/238)) + +* CLI: The `--extra-index-url` flag has been added, allowing users to use + multiple package indices when running with the `-r` flag + ([#238](https://github.com/trailofbits/pip-audit/pull/238)) + ### Changed * `pip-audit`'s minimum Python version is now 3.7. diff --git a/README.md b/README.md index cd0f73ea..16ec7937 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ usage: pip-audit [-h] [-V] [-l] [-r REQUIREMENTS] [-f FORMAT] [-s SERVICE] [-d] [-S] [--desc [{on,off,auto}]] [--cache-dir CACHE_DIR] [--progress-spinner {on,off}] [--timeout TIMEOUT] [--path PATHS] [-v] [--fix] [--require-hashes] + [--index-url INDEX_URL] [--extra-index-url EXTRA_INDEX_URLS] audit the Python environment for dependencies with known vulnerabilities @@ -119,6 +120,15 @@ optional arguments: repeatable audits; this option is implied when any package in a requirements file has a `--hash` option. (default: False) + --index-url INDEX_URL + base URL of the Python Package Index; this should + point to a repository compliant with PEP 503 (the + simple repository API) (default: + https://pypi.org/simple) + --extra-index-url EXTRA_INDEX_URLS + extra URLs of package indexes to use in addition to + `--index-url`; should follow the same rules as + `--index-url` (default: []) ``` diff --git a/pip_audit/_cli.py b/pip_audit/_cli.py index 4891ae4a..1d64594c 100644 --- a/pip_audit/_cli.py +++ b/pip_audit/_cli.py @@ -14,6 +14,7 @@ from pip_audit import __version__ from pip_audit._audit import AuditOptions, Auditor from pip_audit._dependency_source import ( + PYPI_URL, DependencySource, PipSource, RequirementSource, @@ -245,6 +246,22 @@ def _parser() -> argparse.ArgumentParser: help="require a hash to check each requirement against, for repeatable audits; this option " "is implied when any package in a requirements file has a `--hash` option.", ) + parser.add_argument( + "--index-url", + type=str, + help="base URL of the Python Package Index; this should point to a repository compliant " + "with PEP 503 (the simple repository API)", + default=PYPI_URL, + ) + parser.add_argument( + "--extra-index-url", + type=str, + action="append", + dest="extra_index_urls", + default=[], + help="extra URLs of package indexes to use in addition to `--index-url`; should follow the " + "same rules as `--index-url`", + ) return parser @@ -268,9 +285,14 @@ def audit() -> None: output_desc = args.desc.to_bool(args.format) formatter = args.format.to_format(output_desc) - # The `--require-hashes` flag is only valid with requirements files - if args.require_hashes and args.requirements is None: - parser.error("The --require-hashes flag can only be used with --requirement (-r)") + # Check for flags that are only valid with requirements files + if args.requirements is None: + if args.require_hashes: + parser.error("The --require-hashes flag can only be used with --requirement (-r)") + elif args.index_url != PYPI_URL: + parser.error("The --index-url flag can only be used with --requirement (-r)") + elif args.extra_index_urls: + parser.error("The --extra-index-url flag can only be used with --requirement (-r)") with ExitStack() as stack: actors = [] @@ -280,10 +302,11 @@ def audit() -> None: source: DependencySource if args.requirements is not None: + index_urls = [args.index_url] + args.extra_index_urls req_files: List[Path] = [Path(req.name) for req in args.requirements] source = RequirementSource( req_files, - ResolveLibResolver(args.timeout, args.cache_dir, state), + ResolveLibResolver(index_urls, args.timeout, args.cache_dir, state), args.require_hashes, state, ) diff --git a/pip_audit/_dependency_source/__init__.py b/pip_audit/_dependency_source/__init__.py index ab19146d..cacb099e 100644 --- a/pip_audit/_dependency_source/__init__.py +++ b/pip_audit/_dependency_source/__init__.py @@ -11,9 +11,10 @@ ) from .pip import PipSource, PipSourceError from .requirement import RequirementSource -from .resolvelib import ResolveLibResolver +from .resolvelib import PYPI_URL, ResolveLibResolver __all__ = [ + "PYPI_URL", "DependencyFixError", "DependencyResolver", "DependencyResolverError", diff --git a/pip_audit/_dependency_source/resolvelib/__init__.py b/pip_audit/_dependency_source/resolvelib/__init__.py index f4f4bf84..600bc420 100644 --- a/pip_audit/_dependency_source/resolvelib/__init__.py +++ b/pip_audit/_dependency_source/resolvelib/__init__.py @@ -2,9 +2,10 @@ `resolvelib` interactions for `pip-audit`. """ -from .resolvelib import ResolveLibResolver, ResolveLibResolverError +from .resolvelib import PYPI_URL, ResolveLibResolver, ResolveLibResolverError __all__ = [ + "PYPI_URL", "ResolveLibResolver", "ResolveLibResolverError", ] diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index 14963751..e10ecfe5 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -172,14 +172,33 @@ def _get_metadata_for_sdist(self): return metadata -def get_project_from_pypi( - session, project, extras, timeout: Optional[int], state: AuditState +def get_project_from_indexes( + index_urls: List[str], session, project, extras, timeout: Optional[int], state: AuditState ) -> Iterator[Candidate]: - """Return candidates created from the project name and extras.""" - url = "https://pypi.org/simple/{}".format(project) + """Return candidates from all indexes created from the project name and extras.""" + project_found = False + for index_url in index_urls: + # Not all indexes are guaranteed to have the project so this isn't an error + # We should only return an error if it can't be found on ANY of the supplied index URLs + try: + yield from get_project_from_index(index_url, session, project, extras, timeout, state) + project_found = True + except PyPINotFoundError: + pass + if not project_found: + raise PyPINotFoundError( + f'Could not find project "{project}" on any of the supplied index URLs: {index_urls}' + ) + + +def get_project_from_index( + index_url: str, session, project, extras, timeout: Optional[int], state: AuditState +) -> Iterator[Candidate]: + """Return candidates from an index created from the project name and extras.""" + url = index_url + "/" + project response: requests.Response = session.get(url, timeout=timeout) if response.status_code == 404: - raise PyPINotFoundError(f'Could not find project "{project}" on PyPI') + raise PyPINotFoundError response.raise_for_status() data = response.content doc = html5lib.parse(data, namespaceHTMLElements=False) @@ -231,6 +250,7 @@ class PyPIProvider(AbstractProvider): def __init__( self, + index_urls: List[str], timeout: Optional[int] = None, cache_dir: Optional[Path] = None, state: AuditState = AuditState(), @@ -238,6 +258,8 @@ def __init__( """ Create a new `PyPIProvider`. + `index_urls` is a list of package index URLs. + `timeout` is an optional argument to control how many seconds the component should wait for responses to network requests. @@ -245,6 +267,7 @@ def __init__( `state` is an `AuditState` to use for state callbacks. """ + self.index_urls = index_urls self.timeout = timeout self.session = caching_session(cache_dir, use_pip=True) self._state = state @@ -282,8 +305,8 @@ def find_matches(self, identifier, requirements, incompatibilities): candidates = sorted( [ candidate - for candidate in get_project_from_pypi( - self.session, identifier, extras, self.timeout, self._state + for candidate in get_project_from_indexes( + self.index_urls, self.session, identifier, extras, self.timeout, self._state ) if candidate.version not in bad_versions and all(candidate.version in r.specifier for r in requirements) diff --git a/pip_audit/_dependency_source/resolvelib/resolvelib.py b/pip_audit/_dependency_source/resolvelib/resolvelib.py index 9279e02d..ee41ff10 100644 --- a/pip_audit/_dependency_source/resolvelib/resolvelib.py +++ b/pip_audit/_dependency_source/resolvelib/resolvelib.py @@ -19,6 +19,8 @@ logger = logging.getLogger(__name__) +PYPI_URL = "https://pypi.org/simple" + class ResolveLibResolver(DependencyResolver): """ @@ -28,6 +30,7 @@ class ResolveLibResolver(DependencyResolver): def __init__( self, + index_urls: List[str] = [PYPI_URL], timeout: Optional[int] = None, cache_dir: Optional[Path] = None, state: AuditState = AuditState(), @@ -40,7 +43,7 @@ def __init__( `state` is an `AuditState` to use for state callbacks. """ - self.provider = PyPIProvider(timeout, cache_dir, state) + self.provider = PyPIProvider(index_urls, timeout, cache_dir, state) self.reporter = BaseReporter() self.resolver: Resolver = Resolver(self.provider, self.reporter) diff --git a/test/dependency_source/test_resolvelib.py b/test/dependency_source/test_resolvelib.py index ada7b607..d1124a54 100644 --- a/test/dependency_source/test_resolvelib.py +++ b/test/dependency_source/test_resolvelib.py @@ -295,7 +295,105 @@ def __init__(self): resolved_deps = dict(resolver.resolve_all(iter([req]))) assert len(resolved_deps) == 1 expected_deps = [ - SkippedDependency(name="flask", skip_reason='Could not find project "flask" on PyPI') + SkippedDependency( + name="flask", + skip_reason='Could not find project "flask" on any of the supplied index URLs: ' + "['https://pypi.org/simple']", + ) ] assert req in resolved_deps assert resolved_deps[req] == expected_deps + + +def test_resolvelib_multiple_indexes(monkeypatch): + url1 = "https://index1" + url2 = "https://index2" + package_url1 = f"{url1}/flask" + package_url2 = f"{url2}/flask" + data1 = ( + 'Flask-0.5.tar.gz' + "
" + ) + data2 = ( + 'Flask-0.6.tar.gz' + "
" + ) + + monkeypatch.setattr( + pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock() + ) + + def get_multiple_index_package_mock(url): + if url == package_url1: + return get_package_mock(data1) + else: + assert url == package_url2 + return get_package_mock(data2) + + resolver = resolvelib.ResolveLibResolver([url1, url2]) + monkeypatch.setattr( + resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url) + ) + + # We want to check that dependency resolution is considering packages found on both indexes + # + # Test with a requirement that will resolve to a package on the first index + req = Requirement("flask<=0.5") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))] + + # Now test with a requirement that will resolve to a package on the second index + req = Requirement("flask<=0.6") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.6"))] + + +def test_resolvelib_package_missing_on_one_index(monkeypatch): + url1 = "https://index1" + url2 = "https://index2" + package_url1 = f"{url1}/flask" + package_url2 = f"{url2}/flask" + data1 = ( + 'Flask-0.5.tar.gz' + "
" + ) + + monkeypatch.setattr( + pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock() + ) + + # Simulate the package not existing on the second index + def get_multiple_index_package_mock(url): + if url == package_url1: + return get_package_mock(data1) + else: + assert url == package_url2 + pkg = get_package_mock(str()) + pkg.status_code = 404 + return pkg + + resolver = resolvelib.ResolveLibResolver([url1, url2]) + monkeypatch.setattr( + resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url) + ) + + # If a package doesn't exist on one index, we shouldn't expect an error. We should just skip it + # and only use the other index for finding candidates. + req = Requirement("flask<=0.5") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))] + + # Now test with a requirement that will resolve to a package on the second index + req = Requirement("flask<=0.6") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]