properties in top-level functions, improved tests

bootphon · Mar 12, 2019 · 741017d · 741017d
1 parent 84092bb
commit 741017d
Show file tree

Hide file tree

Showing 8 changed files with 115 additions and 23 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.3.1
+1.3.2
diff --git a/h5features/h5features.py b/h5features/h5features.py
@@ -54,17 +54,21 @@ def read(filename, groupname=None, from_item=None, to_item=None,
     :param float to_time: Optional. (defaults to the ending time in
         to_item) the specified times are included in the output
 
-    :param int index: Optional. For faster access. TODO Document and
-        test this.
+    :param int index: Not implemented, raise if used.
 
-    :return: A tuple (times, features) such as:
+    :return: A tuple (times, features) or (times, features,
+        properties) such as:
 
         * time is a dictionary of 1D arrays values (keys are items).
 
-        * features: A dictionary of 2D arrays values (keys are
-          items) with the 'feature' dimension along the columns and the
+        * features: A dictionary of 2D arrays values (keys are items)
+          with the 'features' dimension along the columns and the
           'time' dimension along the lines.
 
+        * properties: A dictionnary of dictionnaries (keys are items)
+          with the corresponding properties. If there is no properties
+          recorded, this value is not returned.
+
     .. note:: Note that all the files that are present on disk between
         to_item and from_item will be loaded and returned. It's the
         responsibility of the user to make sure that it will fit into
@@ -78,11 +82,14 @@ def read(filename, groupname=None, from_item=None, to_item=None,
     reader = Reader(filename, groupname)
     data = (reader.read(from_item, to_item, from_time, to_time)
             if index is None else reader.index_read(index))
-    return data.dict_labels(), data.dict_features()
+    if data.has_properties():
+        return data.dict_labels(), data.dict_features(), data.dict_properties()
+    else:
+        return data.dict_labels(), data.dict_features()
 
 
-def write(filename, groupname, items, times, features,
-          dformat='dense', chunk_size=0.1, sparsity=0.1, mode='a'):
+def write(filename, groupname, items, times, features, properties=None,
+          dformat='dense', chunk_size='auto', sparsity=0.1, mode='a'):
     """Write h5features data in a HDF5 file.
 
     This function is a wrapper to the Writer class. It has three purposes:
@@ -115,12 +122,19 @@ def write(filename, groupname, items, times, features,
         (accomodating row-major storage in hdf5 files).
     :type features: list of 2D numpy arrays
 
+    :param properties: Optional. Properties associated with each
+        item. Properties describe the features associated with each
+        item in a dictionnary. It can store parameters or fields
+        recorded by the user.
+    :type properties: list of dictionnaries
+
     :param str dformat: Optional. Which format to store the features
         into (sparse or dense). Default is dense.
 
     :param float chunk_size: Optional. In Mo, tuning parameter
-        corresponding to the size of a chunk in the h5file. Ignored if
-        the file already exists.
+        corresponding to the size of a chunk in the h5file. By default
+        the chunk size is guessed automatically. Tis parameter is
+        ignored if the file already exists.
 
     :param float sparsity: Optional. Tuning parameter corresponding to
         the expected proportion (in [0, 1]) of non-zeros elements on
@@ -137,12 +151,15 @@ def write(filename, groupname, items, times, features,
     """
     # Prepare the data, raise on error
     sparsity = sparsity if dformat == 'sparse' else None
-    data = Data(items, times, features, sparsity=sparsity, check=True)
+    data = Data(items, times, features, properties=properties,
+                sparsity=sparsity, check=True)
 
     # Write all that stuff in the HDF5 file's specified group
     Writer(filename, chunk_size=chunk_size).write(data, groupname, append=True)
 
 
-def simple_write(filename, group, times, features, item='item', mode='a'):
+def simple_write(filename, group, times, features,
+                 properties=None, item='item', mode='a'):
     """Simplified version of `write()` when there is only one item."""
-    write(filename, group, [item], [times], [features], mode=mode)
+    write(filename, group, [item], [times], [features], mode=mode,
+          properties=[properties] if properties is not None else None)
diff --git a/h5features/labels.py b/h5features/labels.py
@@ -107,7 +107,7 @@ def __eq__(self, other):
                 return False
             # check big data
             for i in range(len(self.data)):
-                if not (self.data[i] == other.data[i]).all():
+                if not np.array_equal(self.data[i], other.data[i]):
                     return False
             return True
         except AttributeError:

diff --git a/test/test_compatibility.py b/test/test_compatibility.py
@@ -19,7 +19,8 @@ def setup(self):
 
         items, times, features = generate.full(20, 10)
         h5f_1_0.write(self.file_v1, 'features', items, times, features)
-        h5f_1_1.write(self.file_v2, 'features', items, times, features)
+        h5f_1_1.write(self.file_v2, 'features', items, times, features,
+                      chunk_size=0.1)
 
     def teardown(self):
         remove(self.file_v1)

diff --git a/test/test_h5features.py b/test/test_h5features.py
@@ -56,8 +56,8 @@ def test_simple_write(self):
         self.features_0 = np.random.randn(30, 20)
         self.times_0 = np.linspace(0, 2, 30)
 
-        h5f.simple_write(self.filename, 'f',
-                         self.times_0, self.features_0)
+        h5f.simple_write(
+            self.filename, 'f', self.times_0, self.features_0)
 
         with h5py.File(self.filename, 'r') as f:
             assert ['f'] == list(f.keys())
@@ -99,13 +99,28 @@ def setup(self):
     def teardown(self):
         remove(self.filename)
 
-    def test_write_simple(self):
+    @pytest.mark.parametrize('properties', [False, True])
+    def test_write_simple(self, properties):
         """write/read a file with a single item of 30 frames"""
         nframes = 30
         f = np.random.randn(nframes, self.dim)
         t = np.linspace(0, 2, nframes)
-        h5f.simple_write(self.filename, 'group1', t, f, 'item')
-        tr, fr = h5f.read(self.filename, 'group1')
+        if properties:
+            props = {'a': 0, 'b': 'b'}
+        else:
+            props = None
+
+        h5f.simple_write(
+            self.filename, 'group1', t, f,
+            properties=props, item='item', mode='w')
+
+        if properties:
+            tr, fr, pr = h5f.read(self.filename, 'group1')
+            assert list(pr.keys()) == ['item']
+            assert pr['item'] == props
+        else:
+            tr, fr = h5f.read(self.filename, 'group1')
+
         assert list(tr.keys()) == ['item']
         assert list(fr.keys()) == ['item']
         assert len(tr['item']) == 30

diff --git a/test/test_labels.py b/test/test_labels.py
@@ -1,9 +1,9 @@
 """Test the labels module of h5features package."""
 
 import h5py
-from numpy.random import randn
 import pytest
 import numpy as np
+from numpy.random import randn
 
 from .aux import generate
 from .aux.utils import assert_raise, remove
@@ -25,6 +25,37 @@ def test_label_one_frame_2D():
     assert (test == gold).all()
 
 
+def test_check_bad():
+    with pytest.raises(IOError) as err:
+        Labels({'foo', 'bar'})
+    assert 'not in a list' in str(err)
+
+    with pytest.raises(IOError) as err:
+        Labels([])
+    assert 'list is empty' in str(err)
+
+    with pytest.raises(IOError) as err:
+        Labels([{0, 1}])
+    assert 'must be numpy arrays' in str(err)
+
+    with pytest.raises(IOError) as err:
+        Labels([np.random.random((2, 2)), np.random.random((2, 3))])
+    assert 'must have same shape on 2nd dim' in str(err)
+
+
+def test_equality():
+    t1 = generate.labels(10, tformat=2)
+    l1 = Labels(t1)
+    l2 = Labels(generate.labels(10, tformat=2))
+    l3 = Labels(t1)
+    l3.name = 'newname'
+
+    assert l1 == l1
+    assert l1 != l2
+    assert l1 != np.random.random((5, 2))
+    assert l1 != l3
+
+
 class TestParseLabels:
     """Test of the parse_labels function."""
     def setup(self):

diff --git a/test/test_reader.py b/test/test_reader.py
@@ -6,6 +6,7 @@
 
 """
 
+import h5py
 import os
 import numpy as np
 import pytest
@@ -16,6 +17,21 @@
 import h5features as h5f
 
 
+def test_read_version(tmpdir):
+    with h5py.File(str(tmpdir.join('foo.h5'))) as f:
+        g = f.create_group('g')
+        g.attrs['version'] = '0.1'
+        assert h5f.version.read_version(g) == '0.1'
+
+        g.attrs['version'] = b'0.1'
+        assert h5f.version.read_version(g) == '0.1'
+
+        g.attrs['version'] = '125'
+        with pytest.raises(IOError) as err:
+            h5f.version.read_version(g)
+        assert 'version 125 is not supported' in str(err)
+
+
 class TestReader:
     def setup(self):
         self.filename = 'test.h5'

diff --git a/test/test_writer.py b/test/test_writer.py
@@ -21,6 +21,12 @@ def test_create_a_file():
     remove(name)
 
 
+def test_bas_version():
+    with pytest.raises(IOError) as err:
+        Writer('a.b', version='a')
+    assert 'version a is not supported' in str(err)
+
+
 class TestInit:
     """Test of Writer.__init__"""
     def setup(self):
@@ -127,7 +133,7 @@ def test_no_append(self, dim):
         with h5py.File(self.filename, 'r') as f:
             g = f[self.group]
             assert len(g['items'][...]) == 10
-            assert not all([(l == 0).all() for l in g['features'][...]])
+            assert not all([(h == 0).all() for h in g['features'][...]])
 
     @pytest.mark.parametrize('dim', [1, 2, 10])
     def test_append(self, dim):
@@ -139,6 +145,12 @@ def test_append(self, dim):
             10, dim=dim, items_root='items_bis', properties=True)
         h5f.Writer(self.filename).write(data2, self.group, append=True)
 
+        # cannot append to incompatible versions
+        with pytest.raises(IOError) as err:
+            h5f.Writer(self.filename, version='0.1').write(
+                data2, self.group, append=True)
+        assert 'versions are different' in str(err)
+
         with h5py.File(self.filename, 'r') as f:
             g = f[self.group]
             items = g['items'][...]