diff --git a/flytekit/image_spec/default_builder.py b/flytekit/image_spec/default_builder.py index aa9933c740..9bf8dc3852 100644 --- a/flytekit/image_spec/default_builder.py +++ b/flytekit/image_spec/default_builder.py @@ -77,6 +77,7 @@ $COPY_COMMAND_RUNTIME RUN $RUN_COMMANDS +$EXTRA_COPY_CMDS WORKDIR /root SHELL ["/bin/bash", "-c"] @@ -221,6 +222,28 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path): else: run_commands = "" + if image_spec.copy: + copy_commands = [] + for src in image_spec.copy: + src_path = Path(src) + + if src_path.is_absolute() or ".." in src_path.parts: + raise ValueError("Absolute paths or paths with '..' are not allowed in COPY command.") + + dst_path = tmp_dir / src_path + dst_path.parent.mkdir(parents=True, exist_ok=True) + + if src_path.is_dir(): + shutil.copytree(src_path, dst_path, dirs_exist_ok=True) + copy_commands.append(f"COPY --chown=flytekit {src_path.as_posix()} /root/{src_path.as_posix()}/") + else: + shutil.copy(src_path, dst_path) + copy_commands.append(f"COPY --chown=flytekit {src_path.as_posix()} /root/{src_path.parent.as_posix()}/") + + extra_copy_cmds = "\n".join(copy_commands) + else: + extra_copy_cmds = "" + docker_content = DOCKER_FILE_TEMPLATE.substitute( PYTHON_VERSION=python_version, UV_PYTHON_INSTALL_COMMAND=uv_python_install_command, @@ -232,6 +255,7 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path): COPY_COMMAND_RUNTIME=copy_command_runtime, ENTRYPOINT=entrypoint, RUN_COMMANDS=run_commands, + EXTRA_COPY_CMDS=extra_copy_cmds, ) dockerfile_path = tmp_dir / "Dockerfile" @@ -247,7 +271,7 @@ class DefaultImageBuilder(ImageSpecBuilder): "python_version", "builder", "source_root", - "copy", + "source_copy_mode", "env", "registry", "packages", @@ -263,6 +287,7 @@ class DefaultImageBuilder(ImageSpecBuilder): "pip_extra_index_url", # "registry_config", "commands", + "copy", } def build_image(self, image_spec: ImageSpec) -> str: diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 216abecb99..5af5a5c2de 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -60,6 +60,7 @@ class ImageSpec: Python files into the image. If the option is set by the user, then that option is of course used. + copy: List of files/directories to copy to /root. e.g. ["src/file1.txt", "src/file2.txt"] """ name: str = "flytekit" @@ -84,6 +85,7 @@ class ImageSpec: commands: Optional[List[str]] = None tag_format: Optional[str] = None source_copy_mode: Optional[CopyFileDetection] = None + copy: Optional[List[str]] = None def __post_init__(self): self.name = self.name.lower() @@ -179,6 +181,12 @@ def tag(self) -> str: # shortcut to represent all the files. spec = dataclasses.replace(spec, source_root=ls_digest) + if self.copy: + from flytekit.tools.fast_registration import compute_digest + + digest = compute_digest(self.copy, None) + spec = dataclasses.replace(spec, copy=digest) + if spec.requirements: requirements = hashlib.sha1(pathlib.Path(spec.requirements).read_bytes().strip()).hexdigest() spec = dataclasses.replace(spec, requirements=requirements) @@ -306,6 +314,12 @@ def with_apt_packages(self, apt_packages: Union[str, List[str]]) -> "ImageSpec": new_image_spec = self._update_attribute("apt_packages", apt_packages) return new_image_spec + def with_copy(self, src: Union[str, List[str]]) -> "ImageSpec": + """ + Builder that returns a new image spec with the source files copied to the destination directory. + """ + return self._update_attribute("copy", src) + def force_push(self) -> "ImageSpec": """ Builder that returns a new image spec with force push enabled. diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index 2a9522c422..6458bfab50 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -12,7 +12,7 @@ import time import typing from dataclasses import dataclass -from typing import Optional +from typing import List, Optional, Union import click from rich import print as rich_print @@ -170,7 +170,7 @@ def fast_package( return archive_fname -def compute_digest(source: os.PathLike, filter: Optional[callable] = None) -> str: +def compute_digest(source: Union[os.PathLike, List[os.PathLike]], filter: Optional[callable] = None) -> str: """ Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents. :param os.PathLike source: @@ -178,22 +178,37 @@ def compute_digest(source: os.PathLike, filter: Optional[callable] = None) -> st :return Text: """ hasher = hashlib.md5() - for root, _, files in os.walk(source, topdown=True): - files.sort() - - for fname in files: - abspath = os.path.join(root, fname) - # Only consider files that exist (e.g. disregard symlinks that point to non-existent files) - if not os.path.exists(abspath): - logger.info(f"Skipping non-existent file {abspath}") - continue - relpath = os.path.relpath(abspath, source) - if filter: - if filter(relpath): - continue - - _filehash_update(abspath, hasher) - _pathhash_update(relpath, hasher) + + def compute_digest_for_file(path: os.PathLike, rel_path: os.PathLike) -> None: + # Only consider files that exist (e.g. disregard symlinks that point to non-existent files) + if not os.path.exists(path): + logger.info(f"Skipping non-existent file {path}") + return + + if filter: + if filter(rel_path): + return + + _filehash_update(path, hasher) + _pathhash_update(rel_path, hasher) + + def compute_digest_for_dir(source: os.PathLike) -> None: + for root, _, files in os.walk(source, topdown=True): + files.sort() + + for fname in files: + abspath = os.path.join(root, fname) + relpath = os.path.relpath(abspath, source) + compute_digest_for_file(abspath, relpath) + + if isinstance(source, list): + for src in source: + if os.path.isdir(src): + compute_digest_for_dir(src) + else: + compute_digest_for_file(src, os.path.basename(src)) + else: + compute_digest_for_dir(source) return hasher.hexdigest() diff --git a/tests/flytekit/unit/core/image_spec/test_default_builder.py b/tests/flytekit/unit/core/image_spec/test_default_builder.py index e8b013619c..dacce33f01 100644 --- a/tests/flytekit/unit/core/image_spec/test_default_builder.py +++ b/tests/flytekit/unit/core/image_spec/test_default_builder.py @@ -7,7 +7,8 @@ from flytekit.image_spec import ImageSpec from flytekit.image_spec.default_builder import DefaultImageBuilder, create_docker_context from flytekit.constants import CopyFileDetection - +from pathlib import Path +import tempfile def test_create_docker_context(tmp_path): docker_context_path = tmp_path / "builder_root" @@ -21,22 +22,27 @@ def test_create_docker_context(tmp_path): other_requirements_path = tmp_path / "requirements.txt" other_requirements_path.write_text("threadpoolctl\n") - image_spec = ImageSpec( - name="FLYTEKIT", - python_version="3.12", - env={"MY_ENV": "MY_VALUE"}, - apt_packages=["curl"], - conda_packages=["scipy==1.13.0", "numpy"], - packages=["pandas==2.2.1"], - requirements=os.fspath(other_requirements_path), - source_root=os.fspath(source_root), - commands=["mkdir my_dir"], - entrypoint=["/bin/bash"], - pip_extra_index_url=["https://extra-url.com"], - source_copy_mode=CopyFileDetection.ALL, - ) + with tempfile.TemporaryDirectory(dir=Path.cwd().as_posix()) as tmp_dir: + tmp_file = Path(tmp_dir) / "copy_file.txt" + tmp_file.write_text("copy_file_content") + + image_spec = ImageSpec( + name="FLYTEKIT", + python_version="3.12", + env={"MY_ENV": "MY_VALUE"}, + apt_packages=["curl"], + conda_packages=["scipy==1.13.0", "numpy"], + packages=["pandas==2.2.1"], + requirements=os.fspath(other_requirements_path), + source_root=os.fspath(source_root), + commands=["mkdir my_dir"], + entrypoint=["/bin/bash"], + pip_extra_index_url=["https://extra-url.com"], + source_copy_mode=CopyFileDetection.ALL, + copy=[tmp_file.relative_to(Path.cwd()).as_posix()], + ) - create_docker_context(image_spec, docker_context_path) + create_docker_context(image_spec, docker_context_path) dockerfile_path = docker_context_path / "Dockerfile" assert dockerfile_path.exists() @@ -51,6 +57,7 @@ def test_create_docker_context(tmp_path): assert "RUN mkdir my_dir" in dockerfile_content assert "ENTRYPOINT [\"/bin/bash\"]" in dockerfile_content assert "mkdir -p $HOME" in dockerfile_content + assert f"COPY --chown=flytekit {tmp_file.relative_to(Path.cwd()).as_posix()} /root/" in dockerfile_content requirements_path = docker_context_path / "requirements_uv.txt" assert requirements_path.exists() @@ -179,6 +186,7 @@ def test_build(tmp_path): requirements=os.fspath(other_requirements_path), source_root=os.fspath(source_root), commands=["mkdir my_dir"], + copy=[f"{tmp_path}/hello_world.txt", f"{tmp_path}/requirements.txt"] ) builder = DefaultImageBuilder() diff --git a/tests/flytekit/unit/core/image_spec/test_image_spec.py b/tests/flytekit/unit/core/image_spec/test_image_spec.py index 6a102292ed..7f3de9622d 100644 --- a/tests/flytekit/unit/core/image_spec/test_image_spec.py +++ b/tests/flytekit/unit/core/image_spec/test_image_spec.py @@ -32,12 +32,14 @@ def test_image_spec(mock_image_spec_builder, monkeypatch): requirements=REQUIREMENT_FILE, registry_config=REGISTRY_CONFIG_FILE, entrypoint=["/bin/bash"], + copy=["/src/file1.txt"] ) assert image_spec._is_force_push is False image_spec = image_spec.with_commands("echo hello") image_spec = image_spec.with_packages("numpy") image_spec = image_spec.with_apt_packages("wget") + image_spec = image_spec.with_copy(["/src", "/src/file2.txt"]) image_spec = image_spec.force_push() assert image_spec.python_version == "3.8" @@ -58,8 +60,9 @@ def test_image_spec(mock_image_spec_builder, monkeypatch): assert image_spec.commands == ["echo hello"] assert image_spec._is_force_push is True assert image_spec.entrypoint == ["/bin/bash"] + assert image_spec.copy == ["/src/file1.txt", "/src", "/src/file2.txt"] - assert image_spec.image_name() == f"localhost:30001/flytekit:nDg0IzEKso7jtbBnpLWTnw" + assert image_spec.image_name() == f"localhost:30001/flytekit:fYU5EUF6y0b2oFG4tu70tA" ctx = context_manager.FlyteContext.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_execution_state(ctx.execution_state.with_params(mode=ExecutionState.Mode.TASK_EXECUTION))