Skip to content

Commit

Permalink
properties in top-level functions, improved tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mmmaat committed Mar 12, 2019
1 parent 84092bb commit 741017d
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 23 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.3.1
1.3.2
43 changes: 30 additions & 13 deletions h5features/h5features.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,21 @@ def read(filename, groupname=None, from_item=None, to_item=None,
:param float to_time: Optional. (defaults to the ending time in
to_item) the specified times are included in the output
:param int index: Optional. For faster access. TODO Document and
test this.
:param int index: Not implemented, raise if used.
:return: A tuple (times, features) such as:
:return: A tuple (times, features) or (times, features,
properties) such as:
* time is a dictionary of 1D arrays values (keys are items).
* features: A dictionary of 2D arrays values (keys are
items) with the 'feature' dimension along the columns and the
* features: A dictionary of 2D arrays values (keys are items)
with the 'features' dimension along the columns and the
'time' dimension along the lines.
* properties: A dictionnary of dictionnaries (keys are items)
with the corresponding properties. If there is no properties
recorded, this value is not returned.
.. note:: Note that all the files that are present on disk between
to_item and from_item will be loaded and returned. It's the
responsibility of the user to make sure that it will fit into
Expand All @@ -78,11 +82,14 @@ def read(filename, groupname=None, from_item=None, to_item=None,
reader = Reader(filename, groupname)
data = (reader.read(from_item, to_item, from_time, to_time)
if index is None else reader.index_read(index))
return data.dict_labels(), data.dict_features()
if data.has_properties():
return data.dict_labels(), data.dict_features(), data.dict_properties()
else:
return data.dict_labels(), data.dict_features()


def write(filename, groupname, items, times, features,
dformat='dense', chunk_size=0.1, sparsity=0.1, mode='a'):
def write(filename, groupname, items, times, features, properties=None,
dformat='dense', chunk_size='auto', sparsity=0.1, mode='a'):
"""Write h5features data in a HDF5 file.
This function is a wrapper to the Writer class. It has three purposes:
Expand Down Expand Up @@ -115,12 +122,19 @@ def write(filename, groupname, items, times, features,
(accomodating row-major storage in hdf5 files).
:type features: list of 2D numpy arrays
:param properties: Optional. Properties associated with each
item. Properties describe the features associated with each
item in a dictionnary. It can store parameters or fields
recorded by the user.
:type properties: list of dictionnaries
:param str dformat: Optional. Which format to store the features
into (sparse or dense). Default is dense.
:param float chunk_size: Optional. In Mo, tuning parameter
corresponding to the size of a chunk in the h5file. Ignored if
the file already exists.
corresponding to the size of a chunk in the h5file. By default
the chunk size is guessed automatically. Tis parameter is
ignored if the file already exists.
:param float sparsity: Optional. Tuning parameter corresponding to
the expected proportion (in [0, 1]) of non-zeros elements on
Expand All @@ -137,12 +151,15 @@ def write(filename, groupname, items, times, features,
"""
# Prepare the data, raise on error
sparsity = sparsity if dformat == 'sparse' else None
data = Data(items, times, features, sparsity=sparsity, check=True)
data = Data(items, times, features, properties=properties,
sparsity=sparsity, check=True)

# Write all that stuff in the HDF5 file's specified group
Writer(filename, chunk_size=chunk_size).write(data, groupname, append=True)


def simple_write(filename, group, times, features, item='item', mode='a'):
def simple_write(filename, group, times, features,
properties=None, item='item', mode='a'):
"""Simplified version of `write()` when there is only one item."""
write(filename, group, [item], [times], [features], mode=mode)
write(filename, group, [item], [times], [features], mode=mode,
properties=[properties] if properties is not None else None)
2 changes: 1 addition & 1 deletion h5features/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def __eq__(self, other):
return False
# check big data
for i in range(len(self.data)):
if not (self.data[i] == other.data[i]).all():
if not np.array_equal(self.data[i], other.data[i]):
return False
return True
except AttributeError:
Expand Down
3 changes: 2 additions & 1 deletion test/test_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def setup(self):

items, times, features = generate.full(20, 10)
h5f_1_0.write(self.file_v1, 'features', items, times, features)
h5f_1_1.write(self.file_v2, 'features', items, times, features)
h5f_1_1.write(self.file_v2, 'features', items, times, features,
chunk_size=0.1)

def teardown(self):
remove(self.file_v1)
Expand Down
25 changes: 20 additions & 5 deletions test/test_h5features.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def test_simple_write(self):
self.features_0 = np.random.randn(30, 20)
self.times_0 = np.linspace(0, 2, 30)

h5f.simple_write(self.filename, 'f',
self.times_0, self.features_0)
h5f.simple_write(
self.filename, 'f', self.times_0, self.features_0)

with h5py.File(self.filename, 'r') as f:
assert ['f'] == list(f.keys())
Expand Down Expand Up @@ -99,13 +99,28 @@ def setup(self):
def teardown(self):
remove(self.filename)

def test_write_simple(self):
@pytest.mark.parametrize('properties', [False, True])
def test_write_simple(self, properties):
"""write/read a file with a single item of 30 frames"""
nframes = 30
f = np.random.randn(nframes, self.dim)
t = np.linspace(0, 2, nframes)
h5f.simple_write(self.filename, 'group1', t, f, 'item')
tr, fr = h5f.read(self.filename, 'group1')
if properties:
props = {'a': 0, 'b': 'b'}
else:
props = None

h5f.simple_write(
self.filename, 'group1', t, f,
properties=props, item='item', mode='w')

if properties:
tr, fr, pr = h5f.read(self.filename, 'group1')
assert list(pr.keys()) == ['item']
assert pr['item'] == props
else:
tr, fr = h5f.read(self.filename, 'group1')

assert list(tr.keys()) == ['item']
assert list(fr.keys()) == ['item']
assert len(tr['item']) == 30
Expand Down
33 changes: 32 additions & 1 deletion test/test_labels.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Test the labels module of h5features package."""

import h5py
from numpy.random import randn
import pytest
import numpy as np
from numpy.random import randn

from .aux import generate
from .aux.utils import assert_raise, remove
Expand All @@ -25,6 +25,37 @@ def test_label_one_frame_2D():
assert (test == gold).all()


def test_check_bad():
with pytest.raises(IOError) as err:
Labels({'foo', 'bar'})
assert 'not in a list' in str(err)

with pytest.raises(IOError) as err:
Labels([])
assert 'list is empty' in str(err)

with pytest.raises(IOError) as err:
Labels([{0, 1}])
assert 'must be numpy arrays' in str(err)

with pytest.raises(IOError) as err:
Labels([np.random.random((2, 2)), np.random.random((2, 3))])
assert 'must have same shape on 2nd dim' in str(err)


def test_equality():
t1 = generate.labels(10, tformat=2)
l1 = Labels(t1)
l2 = Labels(generate.labels(10, tformat=2))
l3 = Labels(t1)
l3.name = 'newname'

assert l1 == l1
assert l1 != l2
assert l1 != np.random.random((5, 2))
assert l1 != l3


class TestParseLabels:
"""Test of the parse_labels function."""
def setup(self):
Expand Down
16 changes: 16 additions & 0 deletions test/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import h5py
import os
import numpy as np
import pytest
Expand All @@ -16,6 +17,21 @@
import h5features as h5f


def test_read_version(tmpdir):
with h5py.File(str(tmpdir.join('foo.h5'))) as f:
g = f.create_group('g')
g.attrs['version'] = '0.1'
assert h5f.version.read_version(g) == '0.1'

g.attrs['version'] = b'0.1'
assert h5f.version.read_version(g) == '0.1'

g.attrs['version'] = '125'
with pytest.raises(IOError) as err:
h5f.version.read_version(g)
assert 'version 125 is not supported' in str(err)


class TestReader:
def setup(self):
self.filename = 'test.h5'
Expand Down
14 changes: 13 additions & 1 deletion test/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ def test_create_a_file():
remove(name)


def test_bas_version():
with pytest.raises(IOError) as err:
Writer('a.b', version='a')
assert 'version a is not supported' in str(err)


class TestInit:
"""Test of Writer.__init__"""
def setup(self):
Expand Down Expand Up @@ -127,7 +133,7 @@ def test_no_append(self, dim):
with h5py.File(self.filename, 'r') as f:
g = f[self.group]
assert len(g['items'][...]) == 10
assert not all([(l == 0).all() for l in g['features'][...]])
assert not all([(h == 0).all() for h in g['features'][...]])

@pytest.mark.parametrize('dim', [1, 2, 10])
def test_append(self, dim):
Expand All @@ -139,6 +145,12 @@ def test_append(self, dim):
10, dim=dim, items_root='items_bis', properties=True)
h5f.Writer(self.filename).write(data2, self.group, append=True)

# cannot append to incompatible versions
with pytest.raises(IOError) as err:
h5f.Writer(self.filename, version='0.1').write(
data2, self.group, append=True)
assert 'versions are different' in str(err)

with h5py.File(self.filename, 'r') as f:
g = f[self.group]
items = g['items'][...]
Expand Down

0 comments on commit 741017d

Please sign in to comment.