From 10a4d085af19a5bc4561810cffb1b4357a1713fb Mon Sep 17 00:00:00 2001 From: Ed Morley <501702+edmorley@users.noreply.github.com> Date: Tue, 11 Jul 2023 23:11:52 +0100 Subject: [PATCH] Propagate user-provided env vars to subprocesses Amongst other things, this change adds support for: - Environment variable interpolation in Pip requirements files (#52) - Specifying an additional Pip package index URL using the env var `PIP_EXTRA_INDEX_URL` (#64) - Passing arbitrary env vars to build backends such as setuptools (#66) --- Previously the buildpack used `clear-env = true` in `buildpack.toml`, which means the process env passed to the buildpack does not contain user-provided env vars (app config vars): https://github.com/buildpacks/spec/blob/main/buildpack.md#provided-by-the-platform When using `clear-env = true` the idea is that buildpack authors can then explicitly load/check the user-provided env vars when needed (eg when using env vars to control buildpack functionality, or when setting the env for subprocesses). This should then provide full control and hopefully prevent cases where incorrect/broken env vars on an app cause failures (for example from a malformed `PYTHONHOME` value). At first glance this seems like a sensible thing to do, however: - `clear-env = true` doesn't actually clear the whole environment, it still allows through env vars set by the OS and env vars from previous buildpacks. This means that badly behaved previous buildpacks can still set broken/incorrect env vars, that we will need to handle regardless. - If an app really does have incorrect/broken user-provided env vars, then setting `clear-env = true` only avoids problems for the current buildpack and not for later buildpacks or at runtime. ie: It only temporarily wallpapers over the problem, and means errors will only be shifted later (at a potentially harder to handle/debug time). - Quite often buildpack authors have to pass all env vars unfiltered to at least some subprocesses (eg when running `pip install`, in order to support the pip requirements file env var interpolation feature; #52). If there are incorrect or broken user-provided env vars, then those commands may fail unless they are handled some other way anyway. Plus, if a buildpack author passes user provided env vars to some subprocesses, but not to others, then it could make any errors seem more confusing to end users, since a user with a broken user provided `PYTHONHOME` would only see a proportion of Python invocations fail. - As of upstream RFC 109, there is now a new concept of "build config" env vars (those set in the build image at `/cnb/build-config`), which are also filtered out when using `clear-env = true`. As such, buildpack authors using `clear-env = true` have to add these back in explicitly too (along with user provided env vars), adding more overhead when using `clear-env = true`. In addition, at the moment libcnb.rs does not yet support them (heroku/libcnb.rs#597). As such, IMO using `clear-env = true` at best adds overhead for minimal benefit (given other mitigations have to be put in place anyway), and at worst makes for a more confusing/harder to debug error experience (since errors can occur at runtime instead if not handled at build). Therefore, use of `clear-env = true` has been removed, and instead the buildpack relies upon defensively setting known-dangerous/often-broken env vars explicitly - using the "override" CNB env var feature. The buildpack mostly already did this, so few changes were required other than adding tests for some commonly seen broken env vars. Env vars known to affect CPython can be found here: https://docs.python.org/3.11/using/cmdline.html#environment-variables And for Pip, env vars are based on their CLI option names, combined with a `PIP_` prefix: https://pip.pypa.io/en/stable/topics/configuration/#environment-variables One set of known broken env vars seen on apps on Heroku, is these (which used to be set via `bin/release` many years ago, but can still be found on some very old apps): https://github.com/heroku/heroku-buildpack-python/blob/27abdfe7d7ad104dabceb45641415251e965671c/bin/release#L11-L18 Fixes #52. Fixes #64. Fixes #66. GUS-W-13648442. GUS-W-13753676. GUS-W-13753756. --- CHANGELOG.md | 1 + buildpack.toml | 1 - src/layers/python.rs | 2 ++ src/main.rs | 8 ++++---- .../pip_editable_git_compiled/requirements.txt | 5 ++++- tests/integration/pip.rs | 4 +++- tests/integration/python_version.rs | 14 +++++++++++--- 7 files changed, 25 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec93530..0acdbb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- User-provided environment variables are now propagated to subprocesses such as `pip install`. ([#65](https://github.com/heroku/buildpacks-python/pull/65)) - Updated pip from 23.1.2 to 23.2. ([#67](https://github.com/heroku/buildpacks-python/pull/67)) - Updated setuptools from 67.8.0 to 68.0.0. ([#51](https://github.com/heroku/buildpacks-python/pull/51)) diff --git a/buildpack.toml b/buildpack.toml index 961ebdd..e8ac309 100644 --- a/buildpack.toml +++ b/buildpack.toml @@ -7,7 +7,6 @@ name = "Python" homepage = "https://github.com/heroku/buildpacks-python" description = "Heroku's official Python Cloud Native Buildpack." keywords = ["python", "heroku"] -clear-env = true [[buildpack.licenses]] type = "BSD-3-Clause" diff --git a/src/layers/python.rs b/src/layers/python.rs index 8b75d66..8656628 100644 --- a/src/layers/python.rs +++ b/src/layers/python.rs @@ -321,6 +321,8 @@ fn generate_layer_env(layer_path: &Path, python_version: &PythonVersion) -> Laye // However, the uWSGI package uses the wrong `sysconfig` APIs so tries to reference the old // compile location, unless we override that by setting `PYTHONHOME`: // https://github.com/unbit/uwsgi/issues/2525 + // In addition, some legacy apps have `PYTHONHOME` set to an invalid value, so if we did not + // set it explicitly here, Python would fail to run both during the build and at runtime. .chainable_insert( Scope::All, ModificationBehavior::Override, diff --git a/src/main.rs b/src/main.rs index 6a2b637..b058417 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,10 +66,10 @@ impl Buildpack for PythonBuildpack { let packaging_tool_versions = PackagingToolVersions::default(); // We inherit the current process's env vars, since we want `PATH` and `HOME` from the OS - // to be set, so that later commands can find tools like Git in the stack image. We exclude - // user-provided env vars (by setting `clear-env` to true in `buildpack.toml`) to prevent an - // app's env vars from breaking internal buildpack commands. Any buildpack steps that need - // user-provided env vars must explicitly retrieve them via `context.platform.env`. + // to be set (so that later commands can find tools like Git in the stack image), along + // with previous-buildpack or user-provided env vars (so that features like env vars in + // in requirements files work). We protect against broken user-provided env vars by + // making sure that buildpack env vars take precedence in layers envs and command usage. let mut command_env = Env::from_current(); // Create the layer containing the Python runtime, and the packages `pip`, `setuptools` and `wheel`. diff --git a/tests/fixtures/pip_editable_git_compiled/requirements.txt b/tests/fixtures/pip_editable_git_compiled/requirements.txt index a6710b5..a9b27b0 100644 --- a/tests/fixtures/pip_editable_git_compiled/requirements.txt +++ b/tests/fixtures/pip_editable_git_compiled/requirements.txt @@ -4,5 +4,8 @@ # # A C-based package is used instead of a pure Python package, in order to test that the # Python headers can be found in the `include/pythonX.Y/` directory of the Python layer. +# +# The URL to the package is specified via env var, to test that user-provided env vars +# are propagated to pip for use by its env var interpolation feature. --e git+https://github.com/pypa/wheel@0.40.0#egg=extension.dist&subdirectory=tests/testdata/extension.dist +-e git+${WHEEL_PACKAGE_URL}@0.40.0#egg=extension.dist&subdirectory=tests/testdata/extension.dist diff --git a/tests/integration/pip.rs b/tests/integration/pip.rs index e7aa13c..0a14caf 100644 --- a/tests/integration/pip.rs +++ b/tests/integration/pip.rs @@ -156,6 +156,7 @@ fn pip_cache_invalidation_and_metadata_compatibility() { } // This tests that: +// - Requirements file env var interpolation works (ie: user-provided env vars have been propagated to pip). // - Git from the stack image can be found (ie: the system PATH has been correctly propagated to pip). // - The editable mode repository clone is saved into the dependencies layer not the app dir. // - Compiling a source distribution package (as opposed to a pre-built wheel) works. @@ -164,7 +165,8 @@ fn pip_cache_invalidation_and_metadata_compatibility() { #[ignore = "integration test"] fn pip_editable_git_compiled() { TestRunner::default().build( - BuildConfig::new(builder(), "tests/fixtures/pip_editable_git_compiled"), + BuildConfig::new(builder(), "tests/fixtures/pip_editable_git_compiled") + .env("WHEEL_PACKAGE_URL", "https://github.com/pypa/wheel"), |context| { assert_contains!( context.pack_stdout, diff --git a/tests/integration/python_version.rs b/tests/integration/python_version.rs index dbc1e3e..cb872db 100644 --- a/tests/integration/python_version.rs +++ b/tests/integration/python_version.rs @@ -76,9 +76,17 @@ fn builds_with_python_version(fixture_path: &str, python_version: &str) { } = PackagingToolVersions::default(); let mut config = BuildConfig::new(builder(), fixture_path); - // Checks that potentially broken user-provided env vars are not being passed unfiltered to - // subprocesses we launch (such as `pip install`), thanks to `clear-env` in `buildpack.toml`. - config.env("PYTHONHOME", "/invalid-path"); + // Checks that potentially broken user-provided env vars don't take precedence over those + // set by this buildpack and break running Python. These are based on the env vars that + // used to be set by `bin/release` by very old versions of the classic Python buildpack: + // https://github.com/heroku/heroku-buildpack-python/blob/27abdfe7d7ad104dabceb45641415251e965671c/bin/release#L11-L18 + config.envs([ + ("LD_LIBRARY_PATH", "/invalid-path"), + ("LIBRARY_PATH", "/invalid-path"), + ("PATH", "/invalid-path"), + ("PYTHONHOME", "/invalid-path"), + ("PYTHONPATH", "/invalid-path"), + ]); TestRunner::default().build(config, |context| { assert_empty!(context.pack_stderr);