[MRG] handle empty TSVs robustly and warn (#1038)

* handle empty TSVs robustly and warn * add whatsnew * use mne warn, not warnings warn
mne-tools · Aug 4, 2022 · 122f268 · 122f268
1 parent be6bd62
commit 122f268
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 4 deletions.
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -47,7 +47,9 @@ Detailed list of changes
 
 - :func:`~mne_bids.write_raw_bids` now stores participant weight and height in ``participants.tsv``, by `Richard Höchenberger`_ (:gh:`1031`)
 
-- :func:`~mne_bids.write_raw_bids` now supports EGI format by `Anand Saini`_, `Scott Huberty`_ and `Mathieu Scheltienne`_ (:gh:`1006`)
+- :func:`~mne_bids.write_raw_bids` now supports EGI format, by `Anand Saini`_, `Scott Huberty`_ and `Mathieu Scheltienne`_ (:gh:`1006`)
+
+- TSV files that are empty (i.e., only a header row is present) are now handled more robustly and a warning is issued, by `Stefan Appelhoff`_ (:gh:`1038`)
 
 🧐 API and behavior changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/mne_bids/tests/test_tsv_handler.py b/mne_bids/tests/test_tsv_handler.py
@@ -63,6 +63,12 @@ def test_tsv_handler(tmp_path):
     d = _from_tsv(d_path)
     assert d['a'] == ['1', '2', '3', '4']
 
+    # test an empty tsv (just headers)
+    _to_tsv(odict(onset=[], duration=[], trial_type=[]), d_path)
+    with pytest.warns(RuntimeWarning, match="TSV file is empty"):
+        d = _from_tsv(d_path)
+    d = _drop(d, "n/a", "trial_type")
+
 
 def test_contains_row_different_types():
     """Test that _contains_row() can handle different dtypes without warning.

diff --git a/mne_bids/tsv_handler.py b/mne_bids/tsv_handler.py
@@ -1,8 +1,10 @@
 """Private functions to handle tabular data."""
-import numpy as np
 from collections import OrderedDict
 from copy import deepcopy
 
+from mne.utils import warn
+import numpy as np
+
 
 def _combine_rows(data1, data2, drop_column=None):
     """Add two OrderedDict's together and optionally drop repeated data.
@@ -109,7 +111,10 @@ def _drop(data, values, column):
     # Cast `values` to the same dtype as `new_data_col` to avoid a NumPy
     # FutureWarning, see
     # https://github.com/mne-tools/mne-bids/pull/372
-    values = np.array(values, dtype=new_data_col.dtype)
+    dtype = new_data_col.dtype
+    if new_data_col.shape == (0,):
+        dtype = np.array(values).dtype
+    values = np.array(values, dtype=dtype)
 
     mask = np.in1d(new_data_col, values, invert=True)
     for key in new_data.keys():
@@ -147,8 +152,16 @@ def _from_tsv(fname, dtypes=None):
     if not len(dtypes) == info.shape[1]:
         raise ValueError('dtypes length mismatch. Provided: {0}, '
                          'Expected: {1}'.format(len(dtypes), info.shape[1]))
+    empty_cols = 0
     for i, name in enumerate(column_names):
-        data_dict[name] = info[:, i].astype(dtypes[i]).tolist()
+        values = info[:, i].astype(dtypes[i]).tolist()
+        data_dict[name] = values
+        if len(values) == 0:
+            empty_cols += 1
+
+    if empty_cols == len(column_names):
+        warn(f"TSV file is empty: '{fname}'")
+
     return data_dict