Skip to content

Commit

Permalink
fix #221 (#225)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers authored Aug 21, 2023
1 parent 5a438aa commit c2296d3
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 4 deletions.
22 changes: 18 additions & 4 deletions dascore/io/prodml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dascore as dc
from dascore.core.coordmanager import get_coord_manager
from dascore.core.coords import get_coord
from dascore.utils.misc import maybe_get_attrs, unbyte
from dascore.utils.misc import maybe_get_attrs, unbyte, iterate

# --- Getting format/version

Expand Down Expand Up @@ -119,9 +119,24 @@ def _get_prodml_attrs(fi, extras=None) -> list[dict]:
return out


def _get_dims(node):
"""Get the dimension names in the form of a tuple."""
# we use distance rather than locus, setup mapping to relect this.
map_ = {"locus": "distance", "Locus": "distance", "Time": "time"}
dims = unbyte(getattr(node.RawData._v_attrs, "Dimensions", "time, distance"))
if isinstance(dims, str):
dims = dims.replace(",", " ")
dims = tuple(map_.get(x, x) for x in dims.split())
else:
unbytes = [unbyte(x) for x in iterate(dims)]
dims = tuple(map_.get(x, x) for x in unbytes)
return dims


def _get_data_attr(attrs, node, time, distance):
"""Get a new attributes with adjusted time/distance and data array."""
cm = get_coord_manager(attrs["coords"], dims=("time", "distance"))
dims = _get_dims(node)
cm = get_coord_manager(attrs["coords"], dims=dims)
new_cm, data = cm.select(array=node.RawData, time=time, distance=distance)
return data, new_cm

Expand All @@ -133,8 +148,7 @@ def _read_prodml(fi, distance=None, time=None, attr_cls=dc.PatchAttrs):
out = []
for attrs, node in zip(attr_list, nodes):
data, coords = _get_data_attr(attrs, node, time, distance)
dims = ("time", "distance") # dims are fixed for this file format
if data.size:
pattrs = attr_cls(**attrs)
out.append(dc.Patch(data=data, attrs=pattrs, dims=dims, coords=coords))
out.append(dc.Patch(data=data, attrs=pattrs, coords=coords))
return out
22 changes: 22 additions & 0 deletions tests/test_io/test_prodml/test_prod_ml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Generic tests for prodml support."""
from __future__ import annotations

import shutil

import pytest
import tables

import dascore as dc
from dascore.io.core import read
Expand Down Expand Up @@ -34,6 +37,20 @@ class TestProdMLFile:
the Silixa file is technical just ProdML v2.1.
"""

@pytest.fixture(scope="class")
def issue_221_patch_path(self, tmp_path_factory):
"""Ensure dims are correctly ascertained."""
tmp_path = tmp_path_factory.mktemp("issue_221")
path = dc.utils.downloader.fetch("prodml_2.0.h5")
new_path = shutil.copy2(path, tmp_path / "prod_2_monkey_patched.h5")
with tables.open_file(new_path, "a") as fi:
# monkey patch dimensions to simulate issue.
new_dims = "time, locus"
parent_node = fi.root.Acquisition["Raw[0]"]
node = parent_node["RawData"]
node._v_attrs.Dimensions = new_dims
return new_path

@pytest.fixture(scope="class")
def silixa_h5_patch(self, idas_h5_example_path):
"""Get the silixa file, return Patch."""
Expand All @@ -49,6 +66,11 @@ def test_has_gauge_length(self, silixa_h5_patch):
patch = silixa_h5_patch
assert hasattr(patch.attrs, "gauge_length")

def test_issue_221(self, issue_221_patch_path):
"""Ensure dims are correctly ascertained."""
patch = dc.read(issue_221_patch_path)[0]
assert isinstance(patch, dc.Patch)


class TestReadQuantXV2:
"""Tests for reading the QuantXV2 format."""
Expand Down

0 comments on commit c2296d3

Please sign in to comment.