Skip to content

Commit

Permalink
[doc][build/02] remove local site-package files from global cache (#4…
Browse files Browse the repository at this point in the history
…6781)

The generated doc build files might include local files, such as local
python site-packages. Remove these files since they cannot be used as a
global cache.

Test:
- CI

Signed-off-by: can <[email protected]>
  • Loading branch information
can-anyscale authored Jul 25, 2024
1 parent 6b81634 commit 6af3f8c
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
25 changes: 25 additions & 0 deletions ci/ray_ci/doc/build_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import tempfile
import subprocess
import os
import pickle
from typing import Set

import boto3
Expand All @@ -10,6 +11,7 @@


AWS_CACHE_KEY = "doc_build"
ENVIRONMENT_PICKLE = "_build/doctrees/environment.pickle"


class BuildCache:
Expand All @@ -29,6 +31,9 @@ def upload(self) -> None:
"""
Upload the build artifacts to S3
"""
logger.info("Massage the build artifacts to be used as a cache.")
self._massage_cache(ENVIRONMENT_PICKLE)

logger.info("Obtaining the list of cache files.")
cache_files = self._get_cache()

Expand All @@ -40,6 +45,26 @@ def upload(self) -> None:

logger.info(f"Successfully uploaded {doc_tarball} to S3.")

def _massage_cache(self, environment_cache_file: str) -> None:
"""
Massage the build artifacts, remove the unnecessary files so that they can
be used as a global cache
"""
environment_cache_path = os.path.join(self._cache_dir, environment_cache_file)
environment_cache = None

with open(environment_cache_path, "rb") as f:
environment_cache = pickle.load(f)
for doc, dependencies in environment_cache.dependencies.items():
# Remove the site-packages dependencies because they are local to the
# build environment and cannot be used as a global cache
local_dependencies = [d for d in dependencies if "site-packages" in d]
for dependency in local_dependencies:
environment_cache.dependencies[doc].remove(dependency)

with open(environment_cache_path, "wb+") as f:
pickle.dump(environment_cache, f, pickle.HIGHEST_PROTOCOL)

def _get_cache(self) -> Set[str]:
"""
Get the list of cache files
Expand Down
29 changes: 29 additions & 0 deletions ci/ray_ci/doc/test_build_cache.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import sys
import os
import pickle
import pytest
import tempfile
from unittest import mock

from ci.ray_ci.doc.build_cache import BuildCache


class FakeCache:
def __init__(self, dependencies):
self.dependencies = dependencies


@mock.patch("subprocess.check_output")
def test_get_cache(mock_check_output):
mock_check_output.return_value = b"file1\nfile2\nfile3"
Expand All @@ -26,5 +32,28 @@ def test_zip_cache():
assert BuildCache(temp_dir)._zip_cache(files) == "12345.tgz"


def test_massage_cache():
cache = FakeCache(
{
"doc1": ["site-packages/dep1", "dep2"],
"doc2": ["dep3", "site-packages/dep4"],
}
)
with tempfile.TemporaryDirectory() as temp_dir:
cache_path = os.path.join(temp_dir, "env_cache.pkl")
with open(cache_path, "wb") as file:
pickle.dump(cache, file)

build_cache = BuildCache(temp_dir)
build_cache._massage_cache("env_cache.pkl")

with open(cache_path, "rb") as file:
cache = pickle.load(file)
assert cache.dependencies == {
"doc1": ["dep2"],
"doc2": ["dep3"],
}


if __name__ == "__main__":
sys.exit(pytest.main(["-vv", __file__]))

0 comments on commit 6af3f8c

Please sign in to comment.