Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to use Github repos that contain symlinks. #3015

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import os
import re
from typing import List, Optional
from typing import List, Optional, Tuple

import requests
from github import Github, GithubException
Expand Down Expand Up @@ -151,7 +151,7 @@ def download_files(
raise RuntimeError("Invalid repository subdirectory.")

os.makedirs(directory, exist_ok=True)

tmp_symlinks: List[Tuple[str, str]] = []
for content in contents:
local_path = os.path.join(directory, content.name)
if content.type == "dir":
Expand All @@ -160,12 +160,25 @@ def download_files(
directory=local_path,
repo_sub_directory=content.path,
)
elif content.type == "symlink":
symlink_content = self.github_repo.get_contents(
content.path, ref=commit
)
symlink_src = symlink_content.raw_data["target"]
tmp_symlinks.append((local_path, symlink_src))
# As it cannot be assumed at this point that the targets of the
# symlink already exist, the symlinks are first collected here and processed later.
else:
try:
with open(local_path, "wb") as f:
f.write(content.decoded_content)
except (GithubException, IOError) as e:
logger.error("Error processing %s: %s", content.path, e)
for symlink in tmp_symlinks:
symlink_dst, symlink_src = symlink
create_symlink_in_local_repo_copy(
symlink_dst=symlink_dst, symlink_src=symlink_src
)

def get_local_context(self, path: str) -> Optional[LocalRepositoryContext]:
"""Gets the local repository context.
Expand Down Expand Up @@ -202,3 +215,51 @@ def check_remote_url(self, url: str) -> bool:
return True

return False


def create_symlink_in_local_repo_copy(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be nice to have a check whether the symlink target is actually part of the repository contents. In other cases, I feel like we should not create the symlink at all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean exactly?
Currently the function is only called when a symlink is found in the repo content.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I meant here is: The symlink source could be a path which is outside of the contents of the repository, for example like this:

echo outside > outside.txt
mkdir repo
cd repo
git init
ln -s ../outside.txt outside_symlink

In most cases, when downloading the symlink the source will not exist, but in some other cases it might lead to unexpected behaviour -> I would suggest we just prevent this case alltogether and don't try to create a symlink if the file is outside of the directory we download. What do you think?

symlink_dst: str, symlink_src: str
) -> None:
"""This function attempts to create a symbolic link at `symlink_dst` that points to `symlink_src`.

If a file or directory already exists at `symlink_dst`, it will
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true anymore I think?

be removed before the symbolic link is created.

Args:
symlink_dst: The path where the symbolic link should be created.
symlink_src: The path that the symbolic link should point to.

Raises:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To pass our CI, we'll need to remove those as they're actually not raised anymore, but a warning is logged instead.

FileNotFoundError: Informs that the target directory specified by
`symlink_dst` does not exist.
PermissionError: Informs that there are insufficient permissions to
create the symbolic link.
NotImplementedError: Informs that symbolic links are not supported on
the current operating system.
OSError: Any other OS-related errors that occur.
"""
try:
os.symlink(src=symlink_src, dst=symlink_dst)

except FileExistsError as e:
logger.debug("The symbolic link already exists. %s",e)
except FileNotFoundError:
logger.warning(
"The target directory of the symbolic link '%s' does not exist. "
"The creation of the symbolic link is skipped.",
symlink_src,
)
except PermissionError:
logger.warning(
"You do not have the necessary permissions to create the symbolic link. "
"The creation of the symbolic link '%s' is skipped.",
symlink_dst,
)
except NotImplementedError:
logger.warning(
"Symbolic links are not supported on this operating system. "
"The creation of the symbolic link '%s' is skipped.",
symlink_dst,
)
except OSError as e:
logger.warning("An OS error occurred: %s", e)
Empty file.
Empty file.
47 changes: 47 additions & 0 deletions tests/unit/integrations/github/test_github_code_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import logging
import os
from tempfile import TemporaryDirectory

import pytest

from zenml.integrations.github.code_repositories.github_code_repository import (
create_symlink_in_local_repo_copy,
)


@pytest.fixture()
def tmp_dir() -> str:
with TemporaryDirectory() as tmp:
yield tmp


@pytest.mark.parametrize("initial_state", ["file", "directory"])
def test_create_symlink_in_local_repo_copy(tmp_dir, initial_state: str):
symlink_dst = os.path.join(tmp_dir, "test_symlink")
symlink_src = os.path.join(tmp_dir, "target_folder")

if initial_state == "file":
with open(symlink_dst) as file:
file.write("content")
elif initial_state == "directory":
os.mkdir(symlink_dst)
os.mkdir(symlink_src)

create_symlink_in_local_repo_copy(
symlink_dst=symlink_dst, symlink_src=symlink_src
)
assert os.path.islink(symlink_dst)
assert os.readlink(symlink_dst) == str(symlink_src)


def test_create_symlink_in_local_repo_copy_target_nonexistent(tmp_dir, caplog):
symlink_dst = os.path.join(tmp_dir, "test_symlink")
symlink_src = os.path.join(tmp_dir, "target_folder")

with caplog.at_level(logging.WARNING):
create_symlink_in_local_repo_copy(
symlink_dst=symlink_dst, symlink_src=symlink_src
)

assert not os.path.exists(symlink_dst)
assert "The target directory of the symbolic link" in caplog.text