Skip to content

Commit

Permalink
chore(fs): add tests to cover recent PRs
Browse files Browse the repository at this point in the history
  • Loading branch information
shcheklein committed Dec 28, 2023
1 parent 1a8cd72 commit e05901d
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 30 deletions.
6 changes: 3 additions & 3 deletions pydrive2/fs/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,9 @@ def info(self, path):

def ls(self, path, detail=False):
bucket, base = self.split_path(path)
assert bucket == self.root

dir_ids = self._path_to_item_ids(base)

if not dir_ids:
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), path
Expand Down Expand Up @@ -465,14 +465,14 @@ def ls(self, path, detail=False):

def find(self, path, detail=False, **kwargs):
bucket, base = self.split_path(path)

seen_paths = set()
assert bucket == self.root

# Make sure the base path is cached and dir_ids below has some
# dirs revelant to this call
self._path_to_item_ids(base)

dir_ids = [self._ids_cache["ids"].copy()]
seen_paths = set()
contents = []
while dir_ids:
query_ids = {
Expand Down
173 changes: 146 additions & 27 deletions pydrive2/test/test_fs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from io import StringIO
import os
import posixpath
import secrets
Expand Down Expand Up @@ -26,16 +27,22 @@ def remote_dir(base_remote_dir):


@pytest.fixture
def fs(tmpdir, base_remote_dir):
setup_credentials()
auth = GoogleAuth(settings_file_path("default.yaml", tmpdir / ""))
auth.ServiceAuth()
def create_fs(tmpdir, base_remote_dir):
def _create_fs(create=True):
setup_credentials()
auth = GoogleAuth(settings_file_path("default.yaml", tmpdir / ""))
auth.ServiceAuth()

bucket, base = base_remote_dir.split("/", 1)
fs = GDriveFileSystem(base_remote_dir, auth)
fs._gdrive_create_dir("root", base)
_, base = base_remote_dir.split("/", 1)
fs = GDriveFileSystem(base_remote_dir, auth)
if create:
item = fs._gdrive_create_dir("root", base)
else:
item = None

return fs
return fs, item

return _create_fs


@pytest.mark.manual
Expand Down Expand Up @@ -66,7 +73,8 @@ def test_fs_service_json(base_remote_dir):
)


def test_info(fs, tmpdir, remote_dir):
def test_info(create_fs, remote_dir):
fs, _ = create_fs()
fs.touch(remote_dir + "/info/a.txt")
fs.touch(remote_dir + "/info/b.txt")
details = fs.info(remote_dir + "/info/a.txt")
Expand All @@ -87,7 +95,8 @@ def test_info(fs, tmpdir, remote_dir):
assert details["name"] == remote_dir + "/info/"


def test_move(fs, remote_dir):
def test_move(create_fs, remote_dir):
fs, _ = create_fs()
fs.touch(remote_dir + "/a.txt")
initial_info = fs.info(remote_dir + "/a.txt")

Expand All @@ -102,7 +111,8 @@ def test_move(fs, remote_dir):
assert initial_info == secondary_info


def test_rm(fs, remote_dir):
def test_rm(create_fs, remote_dir):
fs, _ = create_fs()
fs.touch(remote_dir + "/a.txt")
fs.rm(remote_dir + "/a.txt")
assert not fs.exists(remote_dir + "/a.txt")
Expand All @@ -116,7 +126,8 @@ def test_rm(fs, remote_dir):
assert not fs.exists(remote_dir + "/dir/c/a")


def test_ls(fs: GDriveFileSystem, remote_dir):
def test_ls(create_fs, remote_dir):
fs, _ = create_fs()
_, base = fs.split_path(remote_dir + "dir/")
fs._path_to_item_ids(base, create=True)
assert fs.ls(remote_dir + "dir/") == []
Expand All @@ -141,12 +152,91 @@ def by_name(details):
assert dirs == expected


def test_ls_non_existing_dir(fs, remote_dir):
def test_basic_ops_caching(create_fs, remote_dir, mocker):
# Internally we have to derefence names into IDs to call GDrive APIs
# we are trying hard to cache those and make sure that operations like
# exists, ls, find, etc. don't hit the API more than once per path

# ListFile (_gdrive_list) is the main operation that we use to retieve file
# metadata in all operations like find/ls/exist - etc. It should be fine as
# a basic benchmark to count those.
# Note: we can't count direct API calls since we have retries, also can't
# count even direct calls to the GDrive client - for the same reason

fs, _ = create_fs()
spy = mocker.spy(fs, "_gdrive_list")

dir_path = remote_dir + "/a/b/c/"
file_path = dir_path + "test.txt"
fs.touch(file_path)

assert spy.call_count == 5
spy.reset_mock()

fs.exists(file_path)
assert spy.call_count == 1
spy.reset_mock()

fs.ls(remote_dir)
assert spy.call_count == 1
spy.reset_mock()

fs.ls(dir_path)
assert spy.call_count == 1
spy.reset_mock()

fs.find(dir_path)
assert spy.call_count == 1
spy.reset_mock()

fs.find(remote_dir)
assert spy.call_count == 1
spy.reset_mock()


def test_ops_work_with_duplicate_names(create_fs, remote_dir):
fs, base_item = create_fs()

remote_dir_item = fs._gdrive_create_dir(
base_item["id"], remote_dir.split("/")[-1]
)
dir_name = str(uuid.uuid4())
dir1 = fs._gdrive_create_dir(remote_dir_item["id"], dir_name)
dir2 = fs._gdrive_create_dir(remote_dir_item["id"], dir_name)

# Two directories were created with the same name
assert dir1["id"] != dir2["id"]

dir_path = remote_dir + "/" + dir_name + "/"
for test_fs in [fs, create_fs(create=False)[0]]:
# ls returns both of them, even though the names are the same
result = test_fs.ls(remote_dir)
assert len(result) == 2
assert set(result) == {dir_path}

for test_fs in [fs, create_fs(create=False)[0]]:
# find by default doesn't return dirs at all
result = test_fs.find(remote_dir)
assert len(result) == 0

fs._gdrive_upload_fobj("a.txt", dir1["id"], StringIO(""))
fs._gdrive_upload_fobj("b.txt", dir2["id"], StringIO(""))

for test_fs in [fs, create_fs(create=False)[0]]:
# now we should have both files
result = test_fs.find(remote_dir)
assert len(result) == 2
assert set(result) == {dir_path + file for file in ["a.txt", "b.txt"]}


def test_ls_non_existing_dir(create_fs, remote_dir):
fs, _ = create_fs()
with pytest.raises(FileNotFoundError):
fs.ls(remote_dir + "dir/")


def test_find(fs, remote_dir):
def test_find(create_fs, remote_dir):
fs, _ = create_fs()
fs.mkdir(remote_dir + "/dir")

files = [
Expand All @@ -169,15 +259,28 @@ def test_find(fs, remote_dir):
for file in files:
fs.touch(file)

assert set(fs.find(remote_dir)) == set(files)
for test_fs in [fs, create_fs(create=False)[0]]:
# Test for https://github.com/iterative/PyDrive2/issues/229
# It must go first, so that we test with a cache miss as well
assert set(test_fs.find(remote_dir + "/dir/c/d/")) == set(
[
file
for file in files
if file.startswith(remote_dir + "/dir/c/d/")
]
)

# General find test
assert set(test_fs.find(remote_dir)) == set(files)

find_results = fs.find(remote_dir, detail=True)
info_results = [fs.info(file) for file in files]
info_results = {content["name"]: content for content in info_results}
assert find_results == info_results
find_results = test_fs.find(remote_dir, detail=True)
info_results = [test_fs.info(file) for file in files]
info_results = {content["name"]: content for content in info_results}
assert find_results == info_results


def test_exceptions(fs, tmpdir, remote_dir):
def test_exceptions(create_fs, tmpdir, remote_dir):
fs, _ = create_fs()
with pytest.raises(FileNotFoundError):
with fs.open(remote_dir + "/a.txt"):
...
Expand All @@ -189,7 +292,8 @@ def test_exceptions(fs, tmpdir, remote_dir):
fs.get_file(remote_dir + "/c.txt", tmpdir / "c.txt")


def test_open_rw(fs, remote_dir):
def test_open_rw(create_fs, remote_dir):
fs, _ = create_fs()
data = b"dvc.org"

with fs.open(remote_dir + "/a.txt", "wb") as stream:
Expand All @@ -199,15 +303,22 @@ def test_open_rw(fs, remote_dir):
assert stream.read() == data


def test_concurrent_operations(fs, remote_dir):
def test_concurrent_operations(create_fs, remote_dir):
fs, _ = create_fs()

# Include an extra dir name to force upload operations creating it
# this way we can also test that only a single directory is created
# enven if multiple threads are uploading files into the same dir
dir_name = secrets.token_hex(16)

def create_random_file():
name = secrets.token_hex(16)
with fs.open(remote_dir + "/" + name, "w") as stream:
with fs.open(remote_dir + f"/{dir_name}/" + name, "w") as stream:
stream.write(name)
return name

def read_random_file(name):
with fs.open(remote_dir + "/" + name, "r") as stream:
with fs.open(remote_dir + f"/{dir_name}/" + name, "r") as stream:
return stream.read()

with futures.ThreadPoolExecutor() as executor:
Expand All @@ -225,8 +336,14 @@ def read_random_file(name):

assert write_names == read_names

# Test that only a single dir is cretead
for test_fs in [fs, create_fs(create=False)[0]]:
results = test_fs.ls(remote_dir)
assert results == [remote_dir + f"/{dir_name}/"]


def test_put_file(fs, tmpdir, remote_dir):
def test_put_file(create_fs, tmpdir, remote_dir):
fs, _ = create_fs()
src_file = tmpdir / "a.txt"
with open(src_file, "wb") as file:
file.write(b"data")
Expand All @@ -237,7 +354,8 @@ def test_put_file(fs, tmpdir, remote_dir):
assert stream.read() == b"data"


def test_get_file(fs, tmpdir, remote_dir):
def test_get_file(create_fs, tmpdir, remote_dir):
fs, _ = create_fs()
src_file = tmpdir / "a.txt"
dest_file = tmpdir / "b.txt"

Expand All @@ -249,7 +367,8 @@ def test_get_file(fs, tmpdir, remote_dir):
assert dest_file.read() == "data"


def test_get_file_callback(fs, tmpdir, remote_dir):
def test_get_file_callback(create_fs, tmpdir, remote_dir):
fs, _ = create_fs()
src_file = tmpdir / "a.txt"
dest_file = tmpdir / "b.txt"

Expand Down

0 comments on commit e05901d

Please sign in to comment.