Skip to content

Commit

Permalink
Allow links to have file:// prefix - but don't write them that way …
Browse files Browse the repository at this point in the history
…by default (#1489)

* Allow reading and writing when href startswith file:///

* Add test of reading and writing to hrefs starting with file:///

* Ensure that file:/// are interpretted as absolute urls

* Try to fix windows

* Try to fix windows

* Try to fix windows

* Add some print debugging

* Add more print debugging

* Add more print debugging

* Moved is_absolute to os dependent test

* Fix os-dependent test

* Strip initial slash and see if it passes

* Add more print debugging

* Just strip off the leading slash

* Only for windows

* Fix windows handling

---------

Co-authored-by: Pete Gadomski <[email protected]>
  • Loading branch information
jsignell and gadomski authored Jan 10, 2025
1 parent 43cfdec commit 24e16e4
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 6 deletions.
5 changes: 3 additions & 2 deletions pystac/stac_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def read_text_from_href(self, href: str) -> str:
except HTTPError as e:
raise Exception(f"Could not read uri {href}") from e
else:
href = safe_urlparse(href).path
with open(href, encoding="utf-8") as f:
href_contents = f.read()
return href_contents
Expand All @@ -328,7 +329,7 @@ def write_text_to_href(self, href: str, txt: str) -> None:
"""
if _is_url(href):
raise NotImplementedError("DefaultStacIO cannot write to urls")
href = os.fspath(href)
href = safe_urlparse(href).path
dirname = os.path.dirname(href)
if dirname != "" and not os.path.isdir(dirname):
os.makedirs(dirname)
Expand Down Expand Up @@ -391,7 +392,7 @@ def _report_duplicate_object_names(

def _is_url(href: str) -> bool:
parsed = safe_urlparse(href)
return parsed.scheme != ""
return parsed.scheme not in ["", "file"]


if HAS_URLLIB3:
Expand Down
31 changes: 28 additions & 3 deletions pystac/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ def safe_urlparse(href: str) -> URLParseResult:
query=parsed.query,
fragment=parsed.fragment,
)

# Windows drives sometimes get parsed as the netloc and sometimes
# as part of the parsed.path.
if parsed.scheme == "file" and os.name == "nt":
if parsed.netloc:
path = f"{parsed.netloc}{parsed.path}"
elif parsed.path.startswith("/") and ":" in parsed.path:
path = parsed.path[1:]
else:
path = parsed.path

return URLParseResult(
scheme=parsed.scheme,
netloc="",
path=path,
params=parsed.params,
query=parsed.query,
fragment=parsed.fragment,
)
else:
return parsed

Expand Down Expand Up @@ -246,7 +265,7 @@ def make_relative_href(
):
return source_href

if parsed_start.scheme == "":
if parsed_start.scheme in ["", "file"]:
return _make_relative_href_path(parsed_source, parsed_start, start_is_dir)
else:
return _make_relative_href_url(parsed_source, parsed_start, start_is_dir)
Expand Down Expand Up @@ -311,6 +330,9 @@ def _make_absolute_href_path(
make_posix_style(os.path.abspath(start_dir)), start_dir
)

if parsed_source.scheme or parsed_start.scheme:
abs_path = f"file://{abs_path}"

return abs_path


Expand Down Expand Up @@ -346,7 +368,10 @@ def make_absolute_href(
parsed_start = safe_urlparse(start_href)
parsed_source = safe_urlparse(source_href)

if parsed_source.scheme != "" or parsed_start.scheme != "":
if parsed_source.scheme not in ["", "file"] or parsed_start.scheme not in [
"",
"file",
]:
return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir)
else:
return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir)
Expand All @@ -364,7 +389,7 @@ def is_absolute_href(href: str) -> bool:
bool: ``True`` if the given HREF is absolute, ``False`` if it is relative.
"""
parsed = safe_urlparse(href)
return parsed.scheme != "" or os.path.isabs(parsed.path)
return parsed.scheme not in ["", "file"] or os.path.isabs(parsed.path)


def datetime_to_str(dt: datetime, timespec: str = "auto") -> str:
Expand Down
9 changes: 9 additions & 0 deletions tests/test_stac_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ def test_read_write_collection(self) -> None:
pystac.write_file(collection, dest_href=dest_href)
self.assertTrue(os.path.exists(dest_href), msg="File was not written.")

def test_read_write_collection_with_file_protocol(self) -> None:
collection = pystac.read_file(
"file://" + TestCases.get_path("data-files/collections/multi-extent.json")
)
with tempfile.TemporaryDirectory() as tmp_dir:
dest_href = os.path.join(tmp_dir, "collection.json")
pystac.write_file(collection, dest_href="file://" + dest_href)
self.assertTrue(os.path.exists(dest_href), msg="File was not written.")

def test_read_item(self) -> None:
item = pystac.read_file(TestCases.get_path("data-files/item/sample-item.json"))
with tempfile.TemporaryDirectory() as tmp_dir:
Expand Down
19 changes: 18 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ def test_make_relative_href(self) -> None:
("/a/catalog.json", "/a/b/c/catalog.json", "../../catalog.json"),
("/a/b/c/d/", "/a/b/c/catalog.json", "./d/"),
("/a/b/c/d/.dotfile", "/a/b/c/d/catalog.json", "./.dotfile"),
(
"file:///a/b/c/d/catalog.json",
"file:///a/b/c/catalog.json",
"./d/catalog.json",
),
]

for source_href, start_href, expected in test_cases:
Expand Down Expand Up @@ -161,11 +166,22 @@ def test_make_absolute_href(self) -> None:
"https://stacspec.org/a/b/item.json",
),
("http://localhost:8000", None, "http://localhost:8000"),
("item.json", "file:///a/b/c/catalog.json", "file:///a/b/c/item.json"),
(
"./z/item.json",
"file:///a/b/c/catalog.json",
"file:///a/b/c/z/item.json",
),
("file:///a/b/c/item.json", None, "file:///a/b/c/item.json"),
]

for source_href, start_href, expected in test_cases:
actual = make_absolute_href(source_href, start_href)
_, actual = os.path.splitdrive(actual)
if expected.startswith("file://"):
_, actual = os.path.splitdrive(actual.replace("file://", ""))
actual = f"file://{actual}"
else:
_, actual = os.path.splitdrive(actual)
self.assertEqual(actual, expected)

def test_make_absolute_href_on_vsitar(self) -> None:
Expand Down Expand Up @@ -234,6 +250,7 @@ def test_is_absolute_href_os_aware(self) -> None:
test_cases = [
("/item.json", not incl_drive_letter),
("/home/someuser/Downloads/item.json", not incl_drive_letter),
("file:///home/someuser/Downloads/item.json", not incl_drive_letter),
("d:/item.json", is_windows),
("c:/files/more_files/item.json", is_windows),
]
Expand Down

0 comments on commit 24e16e4

Please sign in to comment.