Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PTV-1767: Fix file metadata incomplete return bug #176

Merged
merged 2 commits into from
May 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
### Version 1.3.5
- Fixed a bug that under some circumstances could cause incomplete file metadata to be returned.

### Version 1.3.4
- Alter the behavior of the bulk specification file writers to return an error if the
input `types` parameter is empty.
Expand Down
2 changes: 1 addition & 1 deletion staging_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
routes = web.RouteTableDef()
VERSION = "1.3.4"
VERSION = "1.3.5"

_DATATYPE_MAPPINGS = None

Expand Down
11 changes: 7 additions & 4 deletions staging_service/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ async def _generate_metadata(path: Path, source: str):
else:
data = {}
# first ouptut of md5sum is the checksum
data["source"] = source
if source:
data["source"] = source
try:
md5 = hashlib.md5(open(path.full_path, "rb").read()).hexdigest()
except:
Expand Down Expand Up @@ -165,7 +166,7 @@ async def some_metadata(path: Path, desired_fields=False, source=None):
os.stat(path.metadata_path).st_mtime < file_stats["mtime"] / 1000
):
# if metadata does not exist or older than file: regenerate
if source is None:
if source is None: # TODO BUGFIX this will overwrite any source in the file
source = _determine_source(path)
data = await _generate_metadata(path, source)
else: # metadata already exists and is up to date
Expand All @@ -174,9 +175,11 @@ async def some_metadata(path: Path, desired_fields=False, source=None):
data = await f.read()
data = decoder.decode(data)
# due to legacy code, some file has corrupted metadata file
# Also if a file is listed or checked for existence before the upload completes
# this code block will be triggered
expected_keys = ["source", "md5", "lineCount", "head", "tail"]
if set(expected_keys) > set(data.keys()):
if source is None:
if not set(expected_keys) <= set(data.keys()):
if source is None and "source" not in data:
source = _determine_source(path)
data = await _generate_metadata(path, source)
data = {**data, **file_stats}
Expand Down
80 changes: 80 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
""" Unit tests for the metadata handling routines. """

import json
import uuid

from collections.abc import Generator
from pathlib import Path as PyPath
from pytest import fixture

from staging_service.utils import Path
from staging_service.metadata import some_metadata

from tests.test_app import FileUtil

# TODO TEST add more unit tests here.


@fixture(scope="module")
def temp_dir() -> Generator[PyPath, None, None]:
with FileUtil() as fu:
childdir = PyPath(fu.make_dir(str(uuid.uuid4()))).resolve()

yield childdir

# FileUtil will auto delete after exiting


async def test_incomplete_metadata_file_update(temp_dir: Path):
"""
Tests the case where a file is listed or checked for existance prior to completing
upload, and then an UPA is added to the file. This previously caused incomplete metadata
to be returned as the logic for whether to run the metadata regeneration code based on
the contents of the current metadata file was incorrect.
See https://kbase-jira.atlassian.net/browse/PTV-1767
"""
await _incomplete_metadata_file_update(
temp_dir,
{"source": "some place", "UPA": "1/2/3"},
"some place"
)

await _incomplete_metadata_file_update(
temp_dir,
{"UPA": "1/2/3"},
"Unknown"
)

async def _incomplete_metadata_file_update(temp_dir, metadict, source):
target = Path(
str(temp_dir / "full"),
str(temp_dir / "meta"),
"user_path",
"myfilename",
"super_fake_jgi_path")

with open(target.full_path, "w") as p:
p.writelines(make_test_lines(1, 6))
with open(target.metadata_path, "w") as p:
p.write(json.dumps(metadict))

res = await some_metadata(target)

assert "mtime" in res # just check the file time is there
del res["mtime"]

assert res == {
"source": source,
"md5": "9f07da9655223b777121141ff7735f25",
"head": "".join(make_test_lines(1, 5)),
"tail": "".join(make_test_lines(2, 6)),
"UPA": "1/2/3",
"isFolder": False,
"lineCount": "5",
"name": "myfilename",
"path": "user_path",
"size": 1280
}

def make_test_lines(start, stop):
return [str(i) + "a" * (256 - len(str(i)) - 1) + "\n" for i in range(start, stop)]