Skip to content

Commit

Permalink
_cli, resolvelib: Support for custom indices (#238)
Browse files Browse the repository at this point in the history
* _cli, resolvelib: Initial support for multiple indices

* pypi_provider: Fix docstrings

* _cli, resolvelib: Create constant for PyPI url

* resolvelib: Document `index_urls` param

* test: Add unit test for multiple indices

* test: Comments

* CHANGELOG: Update changelog

* _cli: Check for improper usage of new index flags

* README: Update help text

* README: Adjust help text

* test: Add a test for when a package only exists on one index
  • Loading branch information
tetsuo-cpp authored Feb 11, 2022
1 parent 40574e4 commit a9c12b9
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 15 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ All versions prior to 0.0.9 are untracked.
conjunction with `-r` to check that all requirements in the file have an
associated hash ([#229](https://github.com/trailofbits/pip-audit/pull/229))

* CLI: The `--index-url` flag has been added, allowing users to use custom
package indices when running with the `-r` flag
([#238](https://github.com/trailofbits/pip-audit/pull/238))

* CLI: The `--extra-index-url` flag has been added, allowing users to use
multiple package indices when running with the `-r` flag
([#238](https://github.com/trailofbits/pip-audit/pull/238))

### Changed

* `pip-audit`'s minimum Python version is now 3.7.
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ usage: pip-audit [-h] [-V] [-l] [-r REQUIREMENTS] [-f FORMAT] [-s SERVICE]
[-d] [-S] [--desc [{on,off,auto}]] [--cache-dir CACHE_DIR]
[--progress-spinner {on,off}] [--timeout TIMEOUT]
[--path PATHS] [-v] [--fix] [--require-hashes]
[--index-url INDEX_URL] [--extra-index-url EXTRA_INDEX_URLS]
audit the Python environment for dependencies with known vulnerabilities
Expand Down Expand Up @@ -119,6 +120,15 @@ optional arguments:
repeatable audits; this option is implied when any
package in a requirements file has a `--hash` option.
(default: False)
--index-url INDEX_URL
base URL of the Python Package Index; this should
point to a repository compliant with PEP 503 (the
simple repository API) (default:
https://pypi.org/simple)
--extra-index-url EXTRA_INDEX_URLS
extra URLs of package indexes to use in addition to
`--index-url`; should follow the same rules as
`--index-url` (default: [])
```
<!-- @end-pip-audit-help@ -->

Expand Down
31 changes: 27 additions & 4 deletions pip_audit/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pip_audit import __version__
from pip_audit._audit import AuditOptions, Auditor
from pip_audit._dependency_source import (
PYPI_URL,
DependencySource,
PipSource,
RequirementSource,
Expand Down Expand Up @@ -245,6 +246,22 @@ def _parser() -> argparse.ArgumentParser:
help="require a hash to check each requirement against, for repeatable audits; this option "
"is implied when any package in a requirements file has a `--hash` option.",
)
parser.add_argument(
"--index-url",
type=str,
help="base URL of the Python Package Index; this should point to a repository compliant "
"with PEP 503 (the simple repository API)",
default=PYPI_URL,
)
parser.add_argument(
"--extra-index-url",
type=str,
action="append",
dest="extra_index_urls",
default=[],
help="extra URLs of package indexes to use in addition to `--index-url`; should follow the "
"same rules as `--index-url`",
)
return parser


Expand All @@ -268,9 +285,14 @@ def audit() -> None:
output_desc = args.desc.to_bool(args.format)
formatter = args.format.to_format(output_desc)

# The `--require-hashes` flag is only valid with requirements files
if args.require_hashes and args.requirements is None:
parser.error("The --require-hashes flag can only be used with --requirement (-r)")
# Check for flags that are only valid with requirements files
if args.requirements is None:
if args.require_hashes:
parser.error("The --require-hashes flag can only be used with --requirement (-r)")
elif args.index_url != PYPI_URL:
parser.error("The --index-url flag can only be used with --requirement (-r)")
elif args.extra_index_urls:
parser.error("The --extra-index-url flag can only be used with --requirement (-r)")

with ExitStack() as stack:
actors = []
Expand All @@ -280,10 +302,11 @@ def audit() -> None:

source: DependencySource
if args.requirements is not None:
index_urls = [args.index_url] + args.extra_index_urls
req_files: List[Path] = [Path(req.name) for req in args.requirements]
source = RequirementSource(
req_files,
ResolveLibResolver(args.timeout, args.cache_dir, state),
ResolveLibResolver(index_urls, args.timeout, args.cache_dir, state),
args.require_hashes,
state,
)
Expand Down
3 changes: 2 additions & 1 deletion pip_audit/_dependency_source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
)
from .pip import PipSource, PipSourceError
from .requirement import RequirementSource
from .resolvelib import ResolveLibResolver
from .resolvelib import PYPI_URL, ResolveLibResolver

__all__ = [
"PYPI_URL",
"DependencyFixError",
"DependencyResolver",
"DependencyResolverError",
Expand Down
3 changes: 2 additions & 1 deletion pip_audit/_dependency_source/resolvelib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
`resolvelib` interactions for `pip-audit`.
"""

from .resolvelib import ResolveLibResolver, ResolveLibResolverError
from .resolvelib import PYPI_URL, ResolveLibResolver, ResolveLibResolverError

__all__ = [
"PYPI_URL",
"ResolveLibResolver",
"ResolveLibResolverError",
]
37 changes: 30 additions & 7 deletions pip_audit/_dependency_source/resolvelib/pypi_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,33 @@ def _get_metadata_for_sdist(self):
return metadata


def get_project_from_pypi(
session, project, extras, timeout: Optional[int], state: AuditState
def get_project_from_indexes(
index_urls: List[str], session, project, extras, timeout: Optional[int], state: AuditState
) -> Iterator[Candidate]:
"""Return candidates created from the project name and extras."""
url = "https://pypi.org/simple/{}".format(project)
"""Return candidates from all indexes created from the project name and extras."""
project_found = False
for index_url in index_urls:
# Not all indexes are guaranteed to have the project so this isn't an error
# We should only return an error if it can't be found on ANY of the supplied index URLs
try:
yield from get_project_from_index(index_url, session, project, extras, timeout, state)
project_found = True
except PyPINotFoundError:
pass
if not project_found:
raise PyPINotFoundError(
f'Could not find project "{project}" on any of the supplied index URLs: {index_urls}'
)


def get_project_from_index(
index_url: str, session, project, extras, timeout: Optional[int], state: AuditState
) -> Iterator[Candidate]:
"""Return candidates from an index created from the project name and extras."""
url = index_url + "/" + project
response: requests.Response = session.get(url, timeout=timeout)
if response.status_code == 404:
raise PyPINotFoundError(f'Could not find project "{project}" on PyPI')
raise PyPINotFoundError
response.raise_for_status()
data = response.content
doc = html5lib.parse(data, namespaceHTMLElements=False)
Expand Down Expand Up @@ -231,20 +250,24 @@ class PyPIProvider(AbstractProvider):

def __init__(
self,
index_urls: List[str],
timeout: Optional[int] = None,
cache_dir: Optional[Path] = None,
state: AuditState = AuditState(),
):
"""
Create a new `PyPIProvider`.
`index_urls` is a list of package index URLs.
`timeout` is an optional argument to control how many seconds the component should wait for
responses to network requests.
`cache_dir` is an optional argument to override the default HTTP caching directory.
`state` is an `AuditState` to use for state callbacks.
"""
self.index_urls = index_urls
self.timeout = timeout
self.session = caching_session(cache_dir, use_pip=True)
self._state = state
Expand Down Expand Up @@ -282,8 +305,8 @@ def find_matches(self, identifier, requirements, incompatibilities):
candidates = sorted(
[
candidate
for candidate in get_project_from_pypi(
self.session, identifier, extras, self.timeout, self._state
for candidate in get_project_from_indexes(
self.index_urls, self.session, identifier, extras, self.timeout, self._state
)
if candidate.version not in bad_versions
and all(candidate.version in r.specifier for r in requirements)
Expand Down
5 changes: 4 additions & 1 deletion pip_audit/_dependency_source/resolvelib/resolvelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

logger = logging.getLogger(__name__)

PYPI_URL = "https://pypi.org/simple"


class ResolveLibResolver(DependencyResolver):
"""
Expand All @@ -28,6 +30,7 @@ class ResolveLibResolver(DependencyResolver):

def __init__(
self,
index_urls: List[str] = [PYPI_URL],
timeout: Optional[int] = None,
cache_dir: Optional[Path] = None,
state: AuditState = AuditState(),
Expand All @@ -40,7 +43,7 @@ def __init__(
`state` is an `AuditState` to use for state callbacks.
"""
self.provider = PyPIProvider(timeout, cache_dir, state)
self.provider = PyPIProvider(index_urls, timeout, cache_dir, state)
self.reporter = BaseReporter()
self.resolver: Resolver = Resolver(self.provider, self.reporter)

Expand Down
100 changes: 99 additions & 1 deletion test/dependency_source/test_resolvelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,105 @@ def __init__(self):
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert len(resolved_deps) == 1
expected_deps = [
SkippedDependency(name="flask", skip_reason='Could not find project "flask" on PyPI')
SkippedDependency(
name="flask",
skip_reason='Could not find project "flask" on any of the supplied index URLs: '
"['https://pypi.org/simple']",
)
]
assert req in resolved_deps
assert resolved_deps[req] == expected_deps


def test_resolvelib_multiple_indexes(monkeypatch):
url1 = "https://index1"
url2 = "https://index2"
package_url1 = f"{url1}/flask"
package_url2 = f"{url2}/flask"
data1 = (
'<a href="https://files.pythonhosted.org/packages/d4/6a/'
"93500f2a7089b4e993fb095215979890b6204a5ba3f6b0f63dc6c3c6c827/Flask-0.5.tar.gz#"
'sha256=20e176b1db0e2bfe92d869f7b5d0ee3e5d6cb60e793755aaf2284bd78a6202ea">Flask-0.5.tar.gz'
"</a><br/>"
)
data2 = (
'<a href="https://files.pythonhosted.org/packages/44/86/'
"481371798994529e105633a50b2332638105a1e191053bc0f4bbc9b91791/Flask-0.6.tar.gz#"
'sha256=9dc18a7c673bf0a6fada51e011fc411285a8301f6dfc1c000ebfa272b5e609e4">Flask-0.6.tar.gz'
"</a><br/>"
)

monkeypatch.setattr(
pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock()
)

def get_multiple_index_package_mock(url):
if url == package_url1:
return get_package_mock(data1)
else:
assert url == package_url2
return get_package_mock(data2)

resolver = resolvelib.ResolveLibResolver([url1, url2])
monkeypatch.setattr(
resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url)
)

# We want to check that dependency resolution is considering packages found on both indexes
#
# Test with a requirement that will resolve to a package on the first index
req = Requirement("flask<=0.5")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]

# Now test with a requirement that will resolve to a package on the second index
req = Requirement("flask<=0.6")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.6"))]


def test_resolvelib_package_missing_on_one_index(monkeypatch):
url1 = "https://index1"
url2 = "https://index2"
package_url1 = f"{url1}/flask"
package_url2 = f"{url2}/flask"
data1 = (
'<a href="https://files.pythonhosted.org/packages/d4/6a/'
"93500f2a7089b4e993fb095215979890b6204a5ba3f6b0f63dc6c3c6c827/Flask-0.5.tar.gz#"
'sha256=20e176b1db0e2bfe92d869f7b5d0ee3e5d6cb60e793755aaf2284bd78a6202ea">Flask-0.5.tar.gz'
"</a><br/>"
)

monkeypatch.setattr(
pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock()
)

# Simulate the package not existing on the second index
def get_multiple_index_package_mock(url):
if url == package_url1:
return get_package_mock(data1)
else:
assert url == package_url2
pkg = get_package_mock(str())
pkg.status_code = 404
return pkg

resolver = resolvelib.ResolveLibResolver([url1, url2])
monkeypatch.setattr(
resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url)
)

# If a package doesn't exist on one index, we shouldn't expect an error. We should just skip it
# and only use the other index for finding candidates.
req = Requirement("flask<=0.5")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]

# Now test with a requirement that will resolve to a package on the second index
req = Requirement("flask<=0.6")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]

0 comments on commit a9c12b9

Please sign in to comment.