Skip to content

Commit

Permalink
fix #251 (#256)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers authored Sep 13, 2023
1 parent 2d96d75 commit beb6901
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 20 deletions.
4 changes: 3 additions & 1 deletion dascore/clients/dirspool.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ def __init__(
index_path: Path | None = None,
preferred_format: str | None = None,
select_kwargs: dict | None = None,
merge_kwargs: dict | None = None,
):
super().__init__(select_kwargs=select_kwargs)
super().__init__(select_kwargs=select_kwargs, merge_kwargs=merge_kwargs)
# Init file spool from another file spool
if isinstance(base_path, self.__class__):
self.__dict__.update(copy.deepcopy(base_path.__dict__))
Expand All @@ -60,6 +61,7 @@ def __init__(
self.indexer = base_path
elif isinstance(base_path, Path | str):
self.indexer = DirectoryIndexer(base_path, index_path=index_path)
assert hasattr(self, "indexer"), "indexer not set."
self._preferred_format = preferred_format

def __rich__(self):
Expand Down
32 changes: 13 additions & 19 deletions dascore/core/spool.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,14 +328,11 @@ def __init__(
self._select_kwargs = {} if select_kwargs is None else select_kwargs
self._merge_kwargs = {} if merge_kwargs is None else merge_kwargs

def __getitem__(self, item):
out = self._get_patches_from_index(item)
# a single index was used, should return a single patch
if not isinstance(item, slice):
out = self._unbox_patch(out)
# a slice was used, return a sub-spool
else:
out = self.__class__(out)
def __getitem__(self, item) -> PatchType | BaseSpool:
if isinstance(item, slice): # a slice was used, return a sub-spool
out = self.new_from_df(df=self._df.iloc[item])
else: # a single index was used, should return a single patch
out = self._unbox_patch(self._get_patches_from_index(item))
return out

def __len__(self):
Expand All @@ -354,17 +351,14 @@ def _get_patches_from_index(self, df_ind):
"""Given an index (from current df), return the corresponding patch."""
source = self._source_df
instruction = self._instruction_df
if isinstance(df_ind, slice): # handle slicing
df1 = instruction.loc[instruction["current_index"].values[df_ind]]
else: # Filter instruction df to only include current index.
# handle negative index.
df_ind = df_ind if df_ind >= 0 else len(self._df) + df_ind
try:
inds = self._df.index[df_ind]
except IndexError:
msg = f"index of [{df_ind}] is out of bounds for spool."
raise IndexError(msg)
df1 = instruction[instruction["current_index"] == inds]
# handle negative index.
df_ind = df_ind if df_ind >= 0 else len(self._df) + df_ind
try:
inds = self._df.index[df_ind]
except IndexError:
msg = f"index of [{df_ind}] is out of bounds for spool."
raise IndexError(msg)
df1 = instruction[instruction["current_index"] == inds]
assert not df1.empty
joined = df1.join(source.drop(columns=df1.columns, errors="ignore"))
return self._patch_from_instruction_df(joined)
Expand Down
Binary file removed docs/_static/logo.png
Binary file not shown.
22 changes: 22 additions & 0 deletions tests/test_clients/test_dirspool.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,28 @@ def test_str_columns_in_dataframe(self, diverse_directory_spool):
assert set(df.columns).issuperset(set(expected))


class TestIndexing:
"""Tests for indexing directory spool."""

def test_slice_to_start(self, diverse_directory_spool):
"""Ensure a slice returns a subspool (shouldn't load data)."""
out = diverse_directory_spool[0:2]
assert isinstance(out, out.__class__)

def test_slice_to_end(self, diverse_directory_spool):
"""Ensure a slice from the end returns a subspool."""
out = diverse_directory_spool[-2:]
assert isinstance(out, out.__class__)

def test_sliced_spool_has_indexer(self, diverse_directory_spool):
"""Ensure the sliced spool still has its indexer."""
out = diverse_directory_spool[1:3]
assert hasattr(out, "indexer")
assert out.indexer.path == diverse_directory_spool.indexer.path
# ensure we can still load patches from sliced dirspool
assert isinstance(out[0], dc.Patch)


class TestFileSpoolIntegrations:
"""Small integration tests for the file spool."""

Expand Down

0 comments on commit beb6901

Please sign in to comment.