Skip to content

Commit

Permalink
[doc][build/01] BuildCache api class (#46780)
Browse files Browse the repository at this point in the history
Move the logic of getting and zipping the doc build artifacts into a
class, and add test cases. Subsequently, I'll add the logic to modify
the pickle file to remove things such as site-packages dependency,
before uploading.

Test:
- CI

---------

Signed-off-by: can <[email protected]>
  • Loading branch information
can-anyscale authored Jul 25, 2024
1 parent eac96b5 commit 7af3ff0
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 38 deletions.
15 changes: 15 additions & 0 deletions ci/ray_ci/doc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,18 @@ py_test(
ci_require("pytest"),
],
)

py_test(
name = "test_build_cache",
size = "small",
srcs = ["test_build_cache.py"],
exec_compatible_with = ["//:hermetic_python"],
tags = [
"ci_unit",
"team:ci",
],
deps = [
":doc",
ci_require("pytest"),
],
)
78 changes: 78 additions & 0 deletions ci/ray_ci/doc/build_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import tempfile
import subprocess
import os
from typing import Set

import boto3

from ci.ray_ci.utils import logger
from ray_release.util import get_write_state_machine_aws_bucket


AWS_CACHE_KEY = "doc_build"


class BuildCache:
"""
BuildCache represents the build artifacts generated from the doc build process,
massaged to be used as a cache for the next build process
"""

def __init__(self, cache_dir: str):
"""
Args:
cache_dir: The directory where the build artifacts are stored
"""
self._cache_dir = cache_dir

def upload(self) -> None:
"""
Upload the build artifacts to S3
"""
logger.info("Obtaining the list of cache files.")
cache_files = self._get_cache()

logger.info("Creating a tarball of the cache files.")
doc_tarball = self._zip_cache(cache_files)

logger.info("Upload the tarball to S3.")
self._upload_cache(doc_tarball)

logger.info(f"Successfully uploaded {doc_tarball} to S3.")

def _get_cache(self) -> Set[str]:
"""
Get the list of cache files
"""
untracked_files = (
subprocess.check_output(
["git", "ls-files", "--others", "-z"],
cwd=self._cache_dir,
)
.decode("utf-8")
.split(os.linesep)
)

return {file for file in untracked_files if file}

def _zip_cache(self, cache_files: Set[str]) -> str:
"""
Create a tarball of the cache files
"""
with tempfile.NamedTemporaryFile(mode="w+t") as temp_file:
temp_file.write("\n".join(cache_files))
doc_tarball = f'{os.environ["BUILDKITE_COMMIT"]}.tgz'
doc_tarball_path = os.path.join(self._cache_dir, doc_tarball)
subprocess.run(
["tar", "-cvzf", doc_tarball_path, "-T", temp_file.name],
cwd=self._cache_dir,
)

return doc_tarball

def _upload_cache(self, doc_tarball: str) -> None:
boto3.client("s3").upload_file(
os.path.join(self._cache_dir, doc_tarball),
get_write_state_machine_aws_bucket(),
f"{AWS_CACHE_KEY}/{doc_tarball}",
)
46 changes: 8 additions & 38 deletions ci/ray_ci/doc/cmd_build.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import subprocess
import os

import boto3
import click

from ci.ray_ci.utils import logger, ci_init
from ray_release.util import get_write_state_machine_aws_bucket
from ray_release.configs.global_config import get_global_config

from ci.ray_ci.doc.build_cache import BuildCache

AWS_CACHE_KEY = "doc_build"
from ray_release.configs.global_config import get_global_config


@click.command()
Expand All @@ -22,6 +19,11 @@ def main(ray_checkout_dir: str) -> None:
This script builds ray doc and upload build artifacts to S3.
"""
ci_init()
# Add the safe.directory config to the global git config so that the doc build
subprocess.run(
["git", "config", "--global", "--add", "safe.directory", ray_checkout_dir],
check=True,
)

logger.info("Building ray doc.")
_build(ray_checkout_dir)
Expand All @@ -42,7 +44,7 @@ def main(ray_checkout_dir: str) -> None:
return

logger.info("Uploading build artifacts to S3.")
_upload_build_artifacts(ray_checkout_dir)
BuildCache(os.path.join(ray_checkout_dir, "doc")).upload()

return

Expand All @@ -60,37 +62,5 @@ def _build(ray_checkout_dir):
)


def _upload_build_artifacts(ray_checkout_dir):
"""
Upload the build artifacts to S3.
"""
# Get the list of the doc-generated files
subprocess.run(
["git", "config", "--global", "--add", "safe.directory", ray_checkout_dir],
check=True,
)
doc_generated_files = subprocess.check_output(
["git", "ls-files", "doc", "--others", "-z"],
cwd=ray_checkout_dir,
)

# Create a tarball of the doc-generated files
doc_tarball = f'{os.environ["BUILDKITE_COMMIT"]}.tgz'
with subprocess.Popen(
["tar", "-cvzf", doc_tarball, "--null", "-T", "-"],
stdin=subprocess.PIPE,
cwd=ray_checkout_dir,
) as proc:
proc.communicate(input=doc_generated_files)

# Upload the tarball to S3
boto3.client("s3").upload_file(
os.path.join(ray_checkout_dir, doc_tarball),
get_write_state_machine_aws_bucket(),
f"{AWS_CACHE_KEY}/{doc_tarball}",
)
logger.info(f"Successfully uploaded {doc_tarball} to S3.")


if __name__ == "__main__":
main()
30 changes: 30 additions & 0 deletions ci/ray_ci/doc/test_build_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import sys
import os
import pytest
import tempfile
from unittest import mock

from ci.ray_ci.doc.build_cache import BuildCache


@mock.patch("subprocess.check_output")
def test_get_cache(mock_check_output):
mock_check_output.return_value = b"file1\nfile2\nfile3"
assert BuildCache("/path/to/cache")._get_cache() == {"file1", "file2", "file3"}


@mock.patch("os.environ", {"BUILDKITE_COMMIT": "12345"})
def test_zip_cache():
with tempfile.TemporaryDirectory() as temp_dir:
files = set()
for i in range(3):
file_name = f"file_{i}.txt"
with open(os.path.join(temp_dir, file_name), "w") as file:
file.write("hi")
files.add(file_name)

assert BuildCache(temp_dir)._zip_cache(files) == "12345.tgz"


if __name__ == "__main__":
sys.exit(pytest.main(["-vv", __file__]))

0 comments on commit 7af3ff0

Please sign in to comment.