Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Io typehint support #196

Merged
merged 3 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions dascore/utils/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@
ns_to_timedelta = partial(pd.to_timedelta, unit="ns")


class HDF5Store(pd.HDFStore):
"""This is a work-around for pandas HDF5 store not accepting file handles."""
class _HDF5Store(pd.HDFStore):
"""
This is a work-around for pandas HDF5 store not accepting
pytables.File objects.
"""

def __init__(
self,
Expand Down Expand Up @@ -258,7 +261,7 @@ def write_update(
# read in dataframe and prepare for input into hdf5 index
update_time = update_time or time.time()
df = self.encode_table(update_df, path=base_path)
with HDF5Store(self.path) as store:
with _HDF5Store(self.path) as store:
try:
nrows = store.get_storer(self._index_node).nrows
except (AttributeError, KeyError):
Expand Down Expand Up @@ -288,7 +291,7 @@ def _read_metadata(self):
Read the metadata table.
"""
try:
with HDF5Store(self.path, "r") as store:
with _HDF5Store(self.path, "r") as store:
out = store.get(self._meta_node)
store.close()
return out
Expand All @@ -304,7 +307,7 @@ def _ensure_meta_table_exists(self):
"""
if not Path(self.path).exists():
return
with HDF5Store(self.path) as store:
with _HDF5Store(self.path) as store:
# add metadata if not in store
if self._meta_node not in store:
meta = self._make_meta_table()
Expand Down
6 changes: 4 additions & 2 deletions dascore/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import abc
import typing
from contextlib import suppress
from functools import cache, singledispatch
from inspect import isfunction, ismethod
from pathlib import Path
Expand Down Expand Up @@ -101,8 +102,9 @@ def get_handle_from_resource(uri, required_type):

return uri if required type is not specified.
"""
if isinstance(uri, required_type):
return uri
with suppress(TypeError):
if isinstance(uri, required_type):
return uri
if (func := HANDLE_FUNCTIONS.get(required_type)) is None:
return uri
return func(uri)
Expand Down
26 changes: 6 additions & 20 deletions docs/contributing/adding_test_data.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,11 @@
title: Adding Test Data
---

There are a few different way to add test data to dascore. The key, however,
is to ensure test files and generated patches are small (a few mb at most) so
the documentation and test suite still run quickly.
There are a few different way to add test data to dascore. The key, however, is to ensure test files and generated patches are small (a few mb at most) so the documentation and test suite still run quickly.

# Adding functions which create example data

The [examples module](`dascore.examples`) contains several functions for creating
example `Patch` and `Spool` instances. You can add a new function in that module
which creates a new patch or spool, then just register the function so it can be
called from `dc.get_example_patch` or `dc.get_example_spool`. These should be simple
objects which can be generated within python. If you need to download a file see
The [examples module](`dascore.examples`) contains several functions for creating example `Patch` and `Spool` instances. You can add a new function in that module which creates a new patch or spool, then just register the function so it can be called from `dc.get_example_patch` or `dc.get_example_spool`. These should be simple objects which can be generated within python. If you need to download a file see
[adding a data file](#adding_a_data_file).

:::{.callout-note}
Expand Down Expand Up @@ -45,24 +39,16 @@ patch_example = dc.get_example_patch("new_das_patch", argument_1="bob")
spool_example = dc.get_example_spool("new_das_spool")
```

If, in the test code, the example patch or spool is used only once, just call
the get_example function in the test. If it is needed multiple times, consider
putting it in a fixture. See [testing](./testing.qmd) for more on fixtures.
If, in the test code, the example patch or spool is used only once, just call the get_example function in the test. If it is needed multiple times, consider putting it in a fixture. See [testing](./testing.qmd) for more on fixtures.

# Adding a data file

Of course, not all data can easily be generated in python. For example, testing
[support for new file formats](./new_format.qmd) typically requires a test file.
Of course, not all data can easily be generated in python. For example, testing [support for new file formats](./new_format.qmd) typically requires a test file.

If you have a small file that isn't already hosted on a permanent site, you can put
it into [dasdae's data repo](https://github.com/DASDAE/test_data).
Simply clone the repo, add you file format, and push back to master or open a
PR on a separate branch and someone will merge it in.
If you have a small file that isn't already hosted on a permanent site, you can put it into [dasdae's data repo](https://github.com/DASDAE/test_data). Simply clone the repo, add you file format, and push back to master or open a PR on a separate branch and someone will merge it in.

Next, add your file to dascore's data registry (dascore/data_registry.txt).
You will have to get the sha256 hash of your test file, for that you can simply
use [Pooch's hash_file function](https://www.fatiando.org/pooch/latest/api/generated/pooch.file_hash.html),
and you can create the proper download url using the other entries as examples.
You will have to get the sha256 hash of your test file, for that you can simply use [Pooch's hash_file function](https://www.fatiando.org/pooch/latest/api/generated/pooch.file_hash.html), and you can create the proper download url using the other entries as examples.

The name, hash, and url might look something like this:
```
Expand Down
20 changes: 13 additions & 7 deletions docs/contributing/new_format.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@ It is very important that the `scan` method returns *exactly* the same patch inf

## Support for Streams/Buffers

Rather than using paths for the IO methods as shown above, it is better practice to write a `FiberIO` which supports the [python stream interface](https://docs.python.org/3/library/io.html#io.BufferedIOBase), sometimes referred to as buffers, or accept an opened HDF5 file in the form of a `pytables.File` object. There are a few reasons for this:
Rather than using paths for the IO methods as shown above, it is better practice to write a `FiberIO` which supports the [python stream interface](https://docs.python.org/3/library/io.html#io.BufferedIOBase) or an opened HDF5 file in the form of a `pytables.File` object. There are a few reasons for this:

* More types of inputs can be supported, including steaming file contents from the web or in-memory streams like [`BytesIO`](https://docs.python.org/3/library/io.html#io.BytesIO).
* It is usually more efficient since open-file handles can be automatically reused by DASCore.

To make this easy, DASCore will automatically manager and serve the right input to `FiberIO` methods based on type hints. Here are the ones currently supported, all of which are imported from [dascore.io](`dascore.io`) :
To make this easy, DASCore will automatically manage and serve the right input to `FiberIO` methods based on type hints. Here are the ones currently supported, all of which are imported from [dascore.io](`dascore.io`) :

1. [BinaryReader](`dascore.io.BinaryReader`) - A stream-like object which must have a `read` and `seek` method.

Expand All @@ -118,7 +118,13 @@ To make this easy, DASCore will automatically manager and serve the right input

4. [HDF5Writer](`dascore.io.HDF5Writer`) - An instance of pytables.File which is open in write or append (append is default) mode.

Deciding which to use depends on whether the file is an HDF5 or binary format. Assuming Jingle is a binary file format, here is an implementation which supports binary streams (only showing the `read` method for brevity):
Deciding which to use depends on whether the file is an HDF5-based or binary format.

:::{.callout-note}
If a type hint other than the ones listed above is given to the relevant parameter (path, or resource in these examples) it will have no effect.
:::

Assuming Jingle is a binary file format, here is an implementation which supports binary streams (only showing the `read` method for brevity):


```{python filename="dascore/io/jingle/core.py"}
Expand All @@ -137,16 +143,16 @@ class JingleV1(FiberIO):
preferred_extensions = ('jgl',)
version = '1'

def read(self, stream: BinaryReader, jingle_param=1, **kwargs):
def read(self, resource: BinaryReader, jingle_param=1, **kwargs):
"""
get_format now accepts a stream, which DASCore will ensure is provided.
"""
# raise an error if we get the wrong type
assert isinstance(stream, BinaryReader)
assert isinstance(resource, BinaryReader)
# read first 50 bytes (maybe they have header info)
first_50_bytes = stream.read(50)
first_50_bytes = resource.read(50)
# seek back to byte 20
stream.seek(20)
resource.seek(20)
# etc.

```
Expand Down
28 changes: 27 additions & 1 deletion tests/test_io/test_io_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import copy
from pathlib import Path
from typing import Union
from typing import TypeVar, Union

import numpy as np
import pytest
Expand Down Expand Up @@ -74,6 +74,19 @@ def scan(self, not_path: BinaryReader):
assert isinstance(not_path, BinaryWriter)


class _FiberUnsupportedTypeHints(FiberIO):
"""A fiber io which implements typehints which have not casting meaning."""

name = "_TypeHinterNotRight"
version = "2"
tvar = TypeVar("tvar", int, float, str, Path)

def read(self, resource: tvar, **kwargs):
"""dummy read"""
with open(resource) as fi:
return fi.read()


class TestFormatManager:
"""tests for the format manager."""

Expand Down Expand Up @@ -286,3 +299,16 @@ def test_non_standard_name(self, dummy_text_file):
"""Ensure non-standard names still work."""
io = _FiberCaster()
io.scan(dummy_text_file)

def test_unsupported_typehints(self, dummy_text_file):
"""Ensure FiberIO with non-"special" type hints still works."""
fiberio = _FiberUnsupportedTypeHints()
out = fiberio.read(dummy_text_file)
assert out == Path(dummy_text_file).read_text()

def test_unsupported_type(self, dummy_text_file):
"""Ensure FiberIO from above works with dascore.read"""
name = _FiberUnsupportedTypeHints.name
version = _FiberUnsupportedTypeHints.version
out = dc.read(dummy_text_file, name, version)
assert out == Path(dummy_text_file).read_text()