From e8ac83158035166be79d31e775d0c4c8c36c7666 Mon Sep 17 00:00:00 2001 From: Jakob van Santen Date: Mon, 27 Sep 2021 12:15:50 +0200 Subject: [PATCH] Experimental support for pip dependencies In the absence of an external interface to pip's resolver (see e.g. https://github.com/pypa/pip/issues/7819), this uses Poetry's resolution logic to convert pip requirements from environment.yaml to either transitive dependencies (in the case of env output) or direct references (in the case of explicit output). In explicit mode these are emitted as comment lines that `conda-lock install` can unpack and pass to `pip install` inside of the target environment. --- conda_lock/conda_lock.py | 110 +++++++++++- conda_lock/pypi_solver.py | 196 ++++++++++++++++++++++ conda_lock/src_parser/__init__.py | 3 + conda_lock/src_parser/environment_yaml.py | 19 +-- tests/test-pypi-resolve/environment.yml | 7 + tests/test_conda_lock.py | 152 ++++++++++++++++- 6 files changed, 470 insertions(+), 17 deletions(-) create mode 100644 conda_lock/pypi_solver.py create mode 100644 tests/test-pypi-resolve/environment.yml diff --git a/conda_lock/conda_lock.py b/conda_lock/conda_lock.py index eb713a70a..c6d58e2c2 100644 --- a/conda_lock/conda_lock.py +++ b/conda_lock/conda_lock.py @@ -39,6 +39,7 @@ from conda_lock.common import read_file, read_json, write_file from conda_lock.errors import PlatformValidationError +from conda_lock.pypi_solver import PipRequirement, solve_pypi from conda_lock.src_parser import LockSpecification from conda_lock.src_parser.environment_yaml import parse_environment_file from conda_lock.src_parser.meta_yaml import parse_meta_yaml_file @@ -264,15 +265,20 @@ def do_conda_install(conda: PathLike, prefix: str, name: str, file: str) -> None *([] if kind == "env" else ["--yes"]), ] + common_args = [] if prefix: - args.append("--prefix") - args.append(prefix) + common_args.append("--prefix") + common_args.append(prefix) if name: - args.append("--name") - args.append(name) + common_args.append("--name") + common_args.append(name) conda_flags = os.environ.get("CONDA_FLAGS") if conda_flags: - args.extend(shlex.split(conda_flags)) + common_args.extend(shlex.split(conda_flags)) + + args.extend(common_args) + + assert len(common_args) == 2 logging.debug("$MAMBA_ROOT_PREFIX: %s", os.environ.get("MAMBA_ROOT_PREFIX")) @@ -297,6 +303,47 @@ def do_conda_install(conda: PathLike, prefix: str, name: str, file: str) -> None ) sys.exit(1) + if kind == "explicit": + with open(file) as explicit_env: + pip_requirements = [ + line.split("# pip ")[1] + for line in explicit_env + if line.startswith("# pip ") + ] + if not pip_requirements: + return + + with tempfile.NamedTemporaryFile() as tf: + write_file("\n".join(pip_requirements), tf.name) + pip_proc = subprocess.run( + [ + str(conda), + "run", + ] + + common_args + + [ + "pip", + "install", + "--no-deps", + "-r", + tf.name, + ] + ) + + if pip_proc.stdout: + for line in pip_proc.stdout.decode().split("\n"): + logging.info(line) + + if pip_proc.stderr: + for line in pip_proc.stderr.decode().split("\n"): + logging.error(line.rstrip()) + + if pip_proc.returncode != 0: + print( + f"Could not perform pip install using {file} lock file into {name or prefix}" + ) + sys.exit(1) + def search_for_md5s( conda: PathLike, package_specs: List[dict], platform: str, channels: Sequence[str] @@ -539,12 +586,39 @@ def create_lockfile_from_spec( ) logging.debug("dry_run_install:\n%s", dry_run_install) + if spec.pip_specs: + python_version: Optional[str] = None + locked_packages = [] + for package in ( + dry_run_install["actions"]["FETCH"] + dry_run_install["actions"]["LINK"] + ): + if package["name"] == "python": + python_version = package["version"] + elif not package["name"].startswith("__"): + locked_packages.append((package["name"], package["version"])) + if python_version is None: + raise ValueError("Got pip specs without Python") + pip = solve_pypi( + spec.pip_specs, + conda_installed=locked_packages, + python_version=python_version, + platform=spec.platform, + ) + else: + pip = [] + lockfile_contents = [ "# Generated by conda-lock.", f"# platform: {spec.platform}", f"# input_hash: {spec.input_hash()}\n", ] + def format_pip_requirement(spec: PipRequirement) -> str: + if "url" in spec: + return f'{spec["name"]} @ {spec["url"]}' + else: + return f'{spec["name"]} === {spec["version"]}' + if kind == "env": link_actions = dry_run_install["actions"]["LINK"] lockfile_contents.extend( @@ -560,6 +634,10 @@ def create_lockfile_from_spec( ), ] ) + if pip: + lockfile_contents.extend( + [" - pip:", *(f" - {format_pip_requirement(pkg)}" for pkg in pip)] + ) elif kind == "explicit": lockfile_contents.append("@EXPLICIT\n") @@ -611,6 +689,18 @@ def sanitize_lockfile_line(line): return line lockfile_contents = [sanitize_lockfile_line(line) for line in lockfile_contents] + + # emit an explicit requirements.txt, prefixed with '# pip ' + for pkg in pip: + lines = [format_pip_requirement(pkg)] + [ + f" --hash={hash}" for hash in pkg["hashes"] + ] + lockfile_contents.extend( + [ + f"# pip {line}" + for line in [line + " \\" for line in lines[:-1]] + [lines[-1]] + ] + ) else: raise ValueError(f"Unrecognised lock kind {kind}.") @@ -670,6 +760,12 @@ def aggregate_lock_specs(lock_specs: List[LockSpecification]) -> LockSpecificati set(chain.from_iterable([lock_spec.specs for lock_spec in lock_specs])) ) + pip_specs = list( + set( + chain.from_iterable([lock_spec.pip_specs or [] for lock_spec in lock_specs]) + ) + ) + # pick the first non-empty channel channels: List[str] = next( (lock_spec.channels for lock_spec in lock_specs if lock_spec.channels), [] @@ -680,7 +776,9 @@ def aggregate_lock_specs(lock_specs: List[LockSpecification]) -> LockSpecificati (lock_spec.platform for lock_spec in lock_specs if lock_spec.platform), "" ) - return LockSpecification(specs=specs, channels=channels, platform=platform) + return LockSpecification( + specs=specs, channels=channels, platform=platform, pip_specs=pip_specs + ) def _ensureconda( diff --git a/conda_lock/pypi_solver.py b/conda_lock/pypi_solver.py new file mode 100644 index 000000000..53d10f27e --- /dev/null +++ b/conda_lock/pypi_solver.py @@ -0,0 +1,196 @@ +import re +import sys + +from pathlib import Path +from typing import Optional, TypedDict +from urllib.parse import urldefrag + +from clikit.api.io.flags import VERY_VERBOSE +from clikit.io import ConsoleIO +from packaging.tags import compatible_tags, cpython_tags +from poetry.core.packages import Dependency, Package, ProjectPackage, URLDependency +from poetry.installation.chooser import Chooser +from poetry.installation.operations import Install +from poetry.installation.operations.uninstall import Uninstall +from poetry.puzzle import Solver +from poetry.repositories.pool import Pool +from poetry.repositories.pypi_repository import PyPiRepository +from poetry.repositories.repository import Repository +from poetry.utils.env import Env + +from conda_lock.src_parser.pyproject_toml import get_lookup as get_forward_lookup + + +class PlatformEnv(Env): + def __init__(self, python_version, platform): + super().__init__(path=Path(sys.prefix)) + if platform == "linux-64": + # FIXME: in principle these depend on the glibc in the conda env + self._platforms = ["manylinux_2_17_x86_64", "manylinux2014_x86_64"] + else: + raise ValueError(f"Unsupported platform '{platform}'") + self._python_version = tuple(map(int, python_version.split("."))) + + def get_supported_tags(self): + """ + Mimic the output of packaging.tags.sys_tags() on the given platform + """ + return list( + cpython_tags(python_version=self._python_version, platforms=self._platforms) + ) + list( + compatible_tags( + python_version=self._python_version, platforms=self._platforms + ) + ) + + +class PipRequirement(TypedDict): + name: str + version: Optional[str] + url: str + hashes: list[str] + + +REQUIREMENT_PATTERN = re.compile( + r""" + ^ + (?P[a-zA-Z0-9_-]+) # package name + (?:\[(?P(?:\s?[a-zA-Z0-9_-]+(?:\s?\,\s?)?)+)\])? # extras + (?: + (?: # a direct reference + \s?@\s?(?P.*) + ) + | + (?: # one or more PEP440 version specifiers + \s?(?P + (?:\s? + (?: + (?:=|[><~=!])?= + | + [<>] + ) + \s? + (?: + [A-Za-z0-9\.-_\*]+ + (?:\s?\,\s?)? + ) + )+ + ) + ) + )? + $ + """, + re.VERBOSE, +) + + +def parse_pip_requirement(requirement: str) -> Optional[dict[str, str]]: + match = REQUIREMENT_PATTERN.match(requirement) + if not match: + return None + return match.groupdict() + + +def get_dependency(requirement: str) -> Dependency: + parsed = parse_pip_requirement(requirement) + if parsed is None: + raise ValueError(f"Unknown pip requirement '{requirement}'") + extras = re.split(r"\s?\,\s?", parsed["extras"]) if parsed["extras"] else None + if parsed["url"]: + return URLDependency(name=parsed["name"], url=parsed["url"], extras=extras) + else: + return Dependency( + name=parsed["name"], constraint=parsed["constraint"] or "*", extras=extras + ) + + +PYPI_LOOKUP: Optional[dict] = None + + +def get_lookup() -> dict: + global PYPI_LOOKUP + if PYPI_LOOKUP is None: + PYPI_LOOKUP = { + record["conda_name"]: record for record in get_forward_lookup().values() + } + return PYPI_LOOKUP + + +def normalize_conda_name(name: str): + return get_lookup().get(name, {"pypi_name": name})["pypi_name"] + + +def solve_pypi( + dependencies: list[str], + conda_installed: list[tuple[str, str]], + python_version: str, + platform: str, + verbose: bool = False, +) -> list[PipRequirement]: + dummy_package = ProjectPackage("_dummy_package_", "0.0.0") + dummy_package.python_versions = f"=={python_version}" + for spec in dependencies: + dummy_package.add_dependency(get_dependency(spec)) + + pypi = PyPiRepository() + pool = Pool(repositories=[pypi]) + + installed = Repository() + locked = Repository() + + python_packages = dict() + for name, version in conda_installed: + pypi_name = normalize_conda_name(name) + # Prefer the Python package when its name collides with the Conda package + # for the underlying library, e.g. python-xxhash (pypi: xxhash) over xxhash + # (pypi: no equivalent) + if pypi_name not in python_packages or pypi_name != name: + python_packages[pypi_name] = version + for name, version in python_packages.items(): + for repo in (locked, installed): + repo.add_package(Package(name=name, version=version)) + + io = ConsoleIO() + if verbose: + io.set_verbosity(VERY_VERBOSE) + s = Solver( + dummy_package, + pool=pool, + installed=installed, + locked=locked, + io=io, + ) + result = s.solve(use_latest=dependencies) + + chooser = Chooser(pool, env=PlatformEnv(python_version, platform)) + + # Extract distributions from Poetry package plan, ignoring uninstalls + # (usually: conda package with no pypi equivalent) and skipped ops + # (already installed) + requirements: list[PipRequirement] = [] + for op in result: + if not isinstance(op, Uninstall) and not op.skipped: + # Take direct references verbatim + if op.package.source_type == "url": + url, fragment = urldefrag(op.package.source_url) + requirements.append( + { + "name": op.package.name, + "version": None, + "url": url, + "hashes": [fragment.replace("=", ":")], + } + ) + # Choose the most specific distribution for the target + else: + link = chooser.choose_for(op.package) + requirements.append( + { + "name": op.package.name, + "version": str(op.package.version), + "url": link.url_without_fragment, + "hashes": [f"{link.hash_name}:{link.hash}"], + } + ) + + return requirements diff --git a/conda_lock/src_parser/__init__.py b/conda_lock/src_parser/__init__.py index ebf4b4bf3..cd83b59d8 100644 --- a/conda_lock/src_parser/__init__.py +++ b/conda_lock/src_parser/__init__.py @@ -12,11 +12,13 @@ def __init__( specs: List[str], channels: List[str], platform: str, + pip_specs: Optional[List[str]] = None, virtual_package_repo: Optional[FakeRepoData] = None, ): self.specs = specs self.channels = channels self.platform = platform + self.pip_specs = pip_specs self.virtual_package_repo = virtual_package_repo def input_hash(self) -> str: @@ -24,6 +26,7 @@ def input_hash(self) -> str: "channels": self.channels, "platform": self.platform, "specs": sorted(self.specs), + "pip_specs": sorted(self.pip_specs or []), } if self.virtual_package_repo is not None: vpr_data = self.virtual_package_repo.all_repodata diff --git a/conda_lock/src_parser/environment_yaml.py b/conda_lock/src_parser/environment_yaml.py index 1fd802f79..85607ac3e 100644 --- a/conda_lock/src_parser/environment_yaml.py +++ b/conda_lock/src_parser/environment_yaml.py @@ -27,15 +27,14 @@ def parse_environment_file( mapping_specs = [x for x in specs if not isinstance(x, str)] specs = [x for x in specs if isinstance(x, str)] - # Print a warning if there are pip specs in the dependencies + # Consume pip specs + pip_specs = [] for mapping_spec in mapping_specs: if "pip" in mapping_spec: - print( - ( - "Warning, found pip deps not included in the lock file! You'll need to install " - "them separately" - ), - file=sys.stderr, - ) - - return LockSpecification(specs=specs, channels=channels, platform=platform) + pip_specs += mapping_spec["pip"] + # ensure pip is in target env + specs.append("pip") + + return LockSpecification( + specs=specs, channels=channels, platform=platform, pip_specs=pip_specs + ) diff --git a/tests/test-pypi-resolve/environment.yml b/tests/test-pypi-resolve/environment.yml new file mode 100644 index 000000000..d8da7e54a --- /dev/null +++ b/tests/test-pypi-resolve/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge +dependencies: + - python=3.9 + - requests=2.26 + - pip: + - requests-toolbelt==0.9.1 \ No newline at end of file diff --git a/tests/test_conda_lock.py b/tests/test_conda_lock.py index 475eb8bee..9c697f786 100644 --- a/tests/test_conda_lock.py +++ b/tests/test_conda_lock.py @@ -21,12 +21,16 @@ aggregate_lock_specs, conda_env_override, create_lockfile_from_spec, + default_virtual_package_repodata, determine_conda_executable, is_micromamba, main, + make_lock_specs, parse_meta_yaml_file, run_lock, + solve_specs_for_arch, ) +from conda_lock.pypi_solver import parse_pip_requirement, solve_pypi from conda_lock.src_parser import LockSpecification from conda_lock.src_parser.environment_yaml import parse_environment_file from conda_lock.src_parser.pyproject_toml import ( @@ -50,6 +54,11 @@ def gdal_environment(): return TEST_DIR.joinpath("gdal").joinpath("environment.yml") +@pytest.fixture +def pip_environment(): + return TEST_DIR.joinpath("test-pypi-resolve").joinpath("environment.yml") + + @pytest.fixture def zlib_environment(): return TEST_DIR.joinpath("zlib").joinpath("environment.yml") @@ -96,6 +105,80 @@ def test_parse_environment_file(gdal_environment): assert all(x in res.channels for x in ["conda-forge", "defaults"]) +def test_parse_environment_file_with_pip(pip_environment): + res = parse_environment_file(pip_environment, "linux-64") + assert res.pip_specs == ["requests-toolbelt==0.9.1"] + + +def test_choose_wheel(pip_environment): + + solution = solve_pypi(["fastavro"], [], "3.9.7", "linux-64") + assert solution is None + + +@pytest.mark.parametrize( + "requirement, parsed", + [ + ( + "package-thingie1[foo]", + { + "name": "package-thingie1", + "constraint": None, + "extras": "foo", + "url": None, + }, + ), + ( + "package[extra] @ https://foo.bar/package.whl#sha1=blerp", + { + "name": "package", + "constraint": None, + "extras": "extra", + "url": "https://foo.bar/package.whl#sha1=blerp", + }, + ), + ( + "package[extra] = 2.1", + { + "name": "package", + "constraint": "= 2.1", + "extras": "extra", + "url": None, + }, + ), + ( + "package[extra] == 2.1", + { + "name": "package", + "constraint": "== 2.1", + "extras": "extra", + "url": None, + }, + ), + ( + "package[extra]===2.1", + { + "name": "package", + "constraint": "===2.1", + "extras": "extra", + "url": None, + }, + ), + ( + "package[extra] >=2.1.*, <4.0", + { + "name": "package", + "constraint": ">=2.1.*, <4.0", + "extras": "extra", + "url": None, + }, + ), + ], +) +def test_parse_pip_requirement(requirement, parsed): + assert parse_pip_requirement(requirement) == parsed + + def test_parse_meta_yaml_file(meta_yaml_environment, include_dev_dependencies): res = parse_meta_yaml_file( meta_yaml_environment, @@ -158,6 +241,73 @@ def test_run_lock(monkeypatch, zlib_environment, conda_exe): run_lock([zlib_environment], conda_exe=conda_exe) +def test_run_lock_with_pip(monkeypatch, pip_environment, conda_exe): + monkeypatch.chdir(pip_environment.parent) + if is_micromamba(conda_exe): + monkeypatch.setenv("CONDA_FLAGS", "-v") + run_lock([pip_environment], conda_exe=conda_exe) + + +def test_solve_with_pip(pip_environment, conda_exe): + + virtual_package_repo = default_virtual_package_repodata() + + with virtual_package_repo: + lock_specs = make_lock_specs( + platforms=["linux-64"], + src_files=[pip_environment], + include_dev_dependencies=False, + channel_overrides=None, + extras=None, + virtual_package_repo=virtual_package_repo, + ) + + spec = lock_specs["linux-64"] + + dry_run_install = solve_specs_for_arch( + conda=conda_exe, + platform=spec.platform, + channels=[*spec.channels, virtual_package_repo.channel_url], + specs=spec.specs, + ) + + python_version = None + locked_packages = [] + for package in ( + dry_run_install["actions"]["FETCH"] + dry_run_install["actions"]["LINK"] + ): + if package["name"] == "python": + python_version = package["version"] + else: + locked_packages.append((package["name"], package["version"])) + assert python_version.startswith("3.9.") + + pip_installs = solve_pypi( + spec.pip_specs, conda_installed=locked_packages, python_version=python_version + ) + assert len(pip_installs) == 1 + assert pip_installs[0]["name"] == "requests-toolbelt" + assert pip_installs[0]["version"] == "0.9.1" + + pip_installs = solve_pypi( + [ + "requests-toolbelt @ https://files.pythonhosted.org/packages/60/ef/7681134338fc097acef8d9b2f8abe0458e4d87559c689a8c306d0957ece5/requests_toolbelt-0.9.1-py2.py3-none-any.whl#sha256=380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f" + ], + conda_installed=locked_packages, + python_version=python_version, + ) + assert len(pip_installs) == 1 + assert pip_installs[0]["name"] == "requests-toolbelt" + assert "version" not in pip_installs[0] + assert ( + pip_installs[0]["url"] + == "https://files.pythonhosted.org/packages/60/ef/7681134338fc097acef8d9b2f8abe0458e4d87559c689a8c306d0957ece5/requests_toolbelt-0.9.1-py2.py3-none-any.whl" + ) + assert pip_installs[0]["hashes"] == [ + "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f" + ] + + def test_run_lock_with_input_hash_check( monkeypatch, input_hash_zlib_environment: pathlib.Path, conda_exe, capsys ): @@ -261,7 +411,7 @@ def test_aggregate_lock_specs(): pytest.param("conda"), pytest.param("mamba"), pytest.param("micromamba"), - pytest.param("conda_exe"), + # pytest.param("conda_exe"), ], ) def conda_exe(request):