Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added symlink dereferencing in fast packaging and tests #1151

Merged
merged 3 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions flytekit/clis/sdk_in_container/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,22 @@ def fast(ctx):


@click.command("workflows")
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.option("-f", "--folder", type=click.Path(exists=True))
@click.pass_context
def fast_workflows(ctx, folder=None):
def fast_workflows(ctx, folder=None, deref_symlinks=False):

if folder:
click.echo(f"Writing output to {folder}")

source_dir = ctx.obj[CTX_LOCAL_SRC_ROOT]
# Write using gzip
archive_fname = fast_package(source_dir, folder)
archive_fname = fast_package(source_dir, folder, deref_symlinks)
click.echo(f"Wrote compressed archive to {archive_fname}")

pkgs = ctx.obj[CTX_PACKAGES]
Expand Down
5 changes: 3 additions & 2 deletions flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
file_access = FlyteContextManager.current_context().file_access


def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike:
def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: bool = False) -> os.PathLike:
"""
Takes a source directory and packages everything not covered by common ignores into a tarball
named after a hexdigest of the included files.
:param os.PathLike source:
:param os.PathLike output_dir:
:param bool deref_symlinks: Enables dereferencing symlinks when packaging directory
:return os.PathLike:
"""
ignore = IgnoreGroup(source, [GitIgnore, DockerIgnore, StandardIgnore])
Expand All @@ -41,7 +42,7 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike:

with tempfile.TemporaryDirectory() as tmp_dir:
tar_path = os.path.join(tmp_dir, "tmp.tar")
with tarfile.open(tar_path, "w") as tar:
with tarfile.open(tar_path, "w", dereference=deref_symlinks) as tar:
tar.add(source, arcname="", filter=lambda x: ignore.tar_filter(tar_strip_file_attributes(x)))
with gzip.GzipFile(filename=archive_fname, mode="wb", mtime=0) as gzipped:
with open(tar_path, "rb") as tar_file:
Expand Down
26 changes: 25 additions & 1 deletion tests/flytekit/unit/tools/test_fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ def flyte_project(tmp_path):
"workflows": {
"__pycache__": {"some.pyc": ""},
"hello_world.py": "print('Hello World!')",
}
},
},
"utils": {
"util.py": "print('Hello from utils!')",
},
".venv": {"lots": "", "of": "", "packages": ""},
".env": "supersecret",
Expand All @@ -35,6 +38,7 @@ def flyte_project(tmp_path):
}

make_tree(tmp_path, tree)
os.symlink(str(tmp_path) + "/utils/util.py", str(tmp_path) + "/src/util")
subprocess.run(["git", "init", str(tmp_path)])
return tmp_path

Expand All @@ -48,9 +52,29 @@ def test_package(flyte_project, tmp_path):
".gitignore",
"keep.foo",
"src",
"src/util",
"src/workflows",
"src/workflows/hello_world.py",
"utils",
"utils/util.py",
]
util = tar.getmember("src/util")
assert util.issym()
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)


def test_package_with_symlink(flyte_project, tmp_path):
archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True)
with tarfile.open(archive_fname, dereference=True) as tar:
assert tar.getnames() == [
"", # tar root, output removes leading '/'
"util",
"workflows",
"workflows/hello_world.py",
]
util = tar.getmember("util")
assert util.isfile()
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)

Expand Down