From c3527fa9e7db0c8b8e97678eb23662fdda02201c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20H=C3=B6chenberger?= Date: Sat, 30 Jul 2022 13:12:02 +0200 Subject: [PATCH] MRG: In BIDSPath, don't infer the datatype from the suffix (#1030) * In BIDSPath.update(), don't infer the datatype from the suffix I needed to set datatype to None to make `find_emptyroom()` deal with the inheritance principle correctly, but turns out it never took effect because `BIDSPath.update()` would silently replace my `datatype=None` with an inferred `datatype='meg'`. * Update doc/whats_new.rst --- doc/whats_new.rst | 2 ++ mne_bids/path.py | 35 ++++++++++++++++------------------- mne_bids/sidecar_updates.py | 3 ++- mne_bids/tests/test_path.py | 18 +++++++++--------- mne_bids/tests/test_update.py | 7 +------ mne_bids/tests/test_write.py | 4 ++-- 6 files changed, 32 insertions(+), 37 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 4ad76a9e3..f50ae7c18 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -48,6 +48,8 @@ Detailed list of changes 🧐 API and behavior changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- In many places, we used to infer the ``datatype`` of a :class:`~mne_bids.BIDSPath` from the ``suffix``, if not explicitly provided. However, this has lead to trouble in certain edge cases. In an effort to reduce the amount of implicit behavior in MNE-BIDS, we now require users to explicitly specify a ``datatype`` whenever the invoked functions or methods expect one, by `Richard Höchenberger`_ (:gh:`1030`) + - :func:`mne_bids.make_dataset_description` now accepts keyword arguments only, and can now also write the following metadata: ``HEDVersion``, ``EthicsApprovals``, ``GeneratedBy``, and ``SourceDatasets``, by `Stefan Appelhoff`_ (:gh:`406`) - The deprecated function ``mne_bids.mark_bad_channels`` has been removed in favor of :func:`mne_bids.mark_channels`, by `Richard Höchenberger`_ (:gh:`1009`) diff --git a/mne_bids/path.py b/mne_bids/path.py index b7a46f592..3eb01943a 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -22,7 +22,7 @@ from mne_bids.config import ( ALLOWED_PATH_ENTITIES, ALLOWED_FILENAME_EXTENSIONS, ALLOWED_FILENAME_SUFFIX, ALLOWED_PATH_ENTITIES_SHORT, - ALLOWED_DATATYPES, SUFFIX_TO_DATATYPE, ALLOWED_DATATYPE_EXTENSIONS, + ALLOWED_DATATYPES, ALLOWED_DATATYPE_EXTENSIONS, ALLOWED_SPACES, reader, ENTITY_VALUE_TYPE) from mne_bids.utils import (_check_key_val, _check_empty_room_basename, @@ -51,10 +51,6 @@ def _find_matched_empty_room(bids_path): 'date set. Cannot get matching empty-room file.') ref_date = raw.info['meas_date'] - if not isinstance(ref_date, datetime): # pragma: no cover - # for MNE < v0.20 - ref_date = datetime.fromtimestamp(raw.info['meas_date'][0]) - emptyroom_dir = BIDSPath(root=bids_root, subject='emptyroom').directory if not emptyroom_dir.exists(): @@ -95,6 +91,7 @@ def _find_matched_empty_room(bids_path): er_bids_path = get_bids_path_from_fname(er_fname, check=False) er_bids_path.subject = 'emptyroom' # er subject entity is different er_bids_path.root = bids_root + er_bids_path.datatype = 'meg' er_meas_date = None # Try to extract date from filename. @@ -229,7 +226,7 @@ class BIDSPath(object): Generate a BIDSPath object and inspect it >>> bids_path = BIDSPath(subject='test', session='two', task='mytask', - ... suffix='ieeg', extension='.edf') + ... suffix='ieeg', extension='.edf', datatype='ieeg') >>> print(bids_path.basename) sub-test_ses-two_task-mytask_ieeg.edf >>> bids_path @@ -733,11 +730,6 @@ def update(self, *, check=None, **kwargs): getattr(self, f'{key}') if hasattr(self, f'_{key}') else None setattr(self, f'_{key}', val) - # infer datatype if suffix is uniquely the datatype - if self.datatype is None and \ - self.suffix in SUFFIX_TO_DATATYPE: - self._datatype = SUFFIX_TO_DATATYPE[self.suffix] - # Perform a check of the entities and revert changes if check fails try: self._check() @@ -899,7 +891,11 @@ def find_empty_room(self, use_sidecar_only=False, verbose=None): 'Please use `bids_path.update(root="")` ' 'to set the root of the BIDS folder to read.') - sidecar_fname = _find_matching_sidecar(self, extension='.json') + sidecar_fname = _find_matching_sidecar( + # needed to deal with inheritance principle + self.copy().update(datatype=None), + extension='.json' + ) with open(sidecar_fname, 'r', encoding='utf-8') as f: sidecar_json = json.load(f) @@ -1237,14 +1233,15 @@ def _parse_ext(raw_fname): return fname, ext -def _infer_datatype_from_path(fname): +def _infer_datatype_from_path(fname: Path): # get the parent - datatype = Path(fname).parent.name - - if any([datatype.startswith(entity) for entity in ['sub', 'ses']]): - datatype = None - - if not datatype: + if fname.exists(): + datatype = fname.parent.name + if any([datatype.startswith(entity) for entity in ['sub', 'ses']]): + datatype = None + elif fname.stem.split('_')[-1] in ('meg', 'eeg', 'ieeg'): + datatype = fname.stem.split('_')[-1] + else: datatype = None return datatype diff --git a/mne_bids/sidecar_updates.py b/mne_bids/sidecar_updates.py index ae8e0d405..45019a59e 100644 --- a/mne_bids/sidecar_updates.py +++ b/mne_bids/sidecar_updates.py @@ -85,7 +85,8 @@ def update_sidecar_json(bids_path, entries, verbose=None): >>> from pathlib import Path >>> root = Path('./mne_bids/tests/data/tiny_bids').absolute() >>> bids_path = BIDSPath(subject='01', task='rest', session='eeg', - ... suffix='eeg', extension='.json', root=root) + ... suffix='eeg', extension='.json', datatype='eeg', + ... root=root) >>> entries = {'PowerLineFrequency': 60} >>> update_sidecar_json(bids_path, entries, verbose=False) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 49df3d225..25ff35d68 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -260,10 +260,8 @@ def test_parse_ext(): @pytest.mark.parametrize('fname', [ 'sub-01_ses-02_task-test_run-3_split-01_meg.fif', 'sub-01_ses-02_task-test_run-3_split-01', - ('/bids_root/sub-01/ses-02/meg/' + - 'sub-01_ses-02_task-test_run-3_split-01_meg.fif'), - ('sub-01/ses-02/meg/' + - 'sub-01_ses-02_task-test_run-3_split-01_meg.fif') + '/bids_root/sub-01/ses-02/meg/sub-01_ses-02_task-test_run-3_split-01_meg.fif', # noqa: E501 + 'sub-01/ses-02/meg/sub-01_ses-02_task-test_run-3_split-01_meg.fif' ]) def test_get_bids_path_from_fname(fname): bids_path = get_bids_path_from_fname(fname) @@ -598,7 +596,7 @@ def test_bids_path(return_bids_test_dir): # ... but raises an error with check=True match = r'space \(foo\) is not valid for datatype \(eeg\)' with pytest.raises(ValueError, match=match): - BIDSPath(subject=subject_id, space='foo', suffix='eeg') + BIDSPath(subject=subject_id, space='foo', suffix='eeg', datatype='eeg') # error check on space for datatypes that do not support space match = 'space entity is not valid for datatype anat' @@ -612,7 +610,8 @@ def test_bids_path(return_bids_test_dir): bids_path_tmpcopy.update(space='CapTrak', check=True) # making a valid space update works - bids_path_tmpcopy.update(suffix='eeg', space="CapTrak", check=True) + bids_path_tmpcopy.update(suffix='eeg', datatype='eeg', + space="CapTrak", check=True) # suffix won't be error checks if initial check was false bids_path.update(suffix=suffix) @@ -628,7 +627,7 @@ def test_bids_path(return_bids_test_dir): # test repr bids_path = BIDSPath(subject='01', session='02', - task='03', suffix='ieeg', + task='03', suffix='ieeg', datatype='ieeg', extension='.edf') assert repr(bids_path) == ('BIDSPath(\n' 'root: None\n' @@ -680,7 +679,8 @@ def test_make_filenames(): # All keys work prefix_data = dict(subject='one', session='two', task='three', acquisition='four', run=1, processing='six', - recording='seven', suffix='ieeg', extension='.json') + recording='seven', suffix='ieeg', extension='.json', + datatype='ieeg') expected_str = ('sub-one_ses-two_task-three_acq-four_run-01_proc-six_' 'rec-seven_ieeg.json') assert BIDSPath(**prefix_data).basename == expected_str @@ -974,7 +974,7 @@ def test_find_emptyroom_ties(tmp_path): 'sample_audvis_trunc_raw.fif') bids_root = str(tmp_path) - bids_path = _bids_path.copy().update(root=bids_root) + bids_path = _bids_path.copy().update(root=bids_root, datatype='meg') session = '20010101' er_dir_path = BIDSPath(subject='emptyroom', session=session, datatype='meg', root=bids_root) diff --git a/mne_bids/tests/test_update.py b/mne_bids/tests/test_update.py index 87d96949a..59f78cfd6 100644 --- a/mne_bids/tests/test_update.py +++ b/mne_bids/tests/test_update.py @@ -107,7 +107,7 @@ def test_update_sidecar_jsons(_get_bids_test_dir, _bids_validate, ('SEEGChannelCount', None, 0)] # get the sidecar json - sidecar_path = bids_path.copy().update(extension='.json') + sidecar_path = bids_path.copy().update(extension='.json', datatype='meg') sidecar_fpath = sidecar_path.fpath with open(sidecar_fpath, 'r', encoding='utf-8') as fin: sidecar_json = json.load(fin) @@ -212,11 +212,6 @@ def test_update_anat_landmarks(tmp_path): update_anat_landmarks(bids_path=bids_path_mri_no_ext, landmarks=landmarks_new) - # Check without datatytpe provided - bids_path_mri_no_datatype = bids_path_mri.copy().update(datatype=None) - update_anat_landmarks(bids_path=bids_path_mri_no_datatype, - landmarks=landmarks) - # Check handling of invalid input bids_path_invalid = bids_path_mri.copy().update(datatype='meg') with pytest.raises(ValueError, match='Can only operate on "anat"'): diff --git a/mne_bids/tests/test_write.py b/mne_bids/tests/test_write.py index 6e79a059f..334e6e0bb 100644 --- a/mne_bids/tests/test_write.py +++ b/mne_bids/tests/test_write.py @@ -446,7 +446,7 @@ def test_line_freq(line_freq, _bids_validate, tmp_path): _bids_validate(bids_root) eeg_json_fpath = (bids_path.copy() - .update(suffix='eeg', extension='.json') + .update(suffix='eeg', datatype='eeg', extension='.json') .fpath) with open(eeg_json_fpath, 'r', encoding='utf-8') as fin: eeg_json = json.load(fin) @@ -916,7 +916,7 @@ def test_kit(_bids_validate, tmp_path): # ensure the marker file is produced in the right place marker_fname = BIDSPath( subject=subject_id, session=session_id, task=task, run=run, - suffix='markers', extension='.sqd', + suffix='markers', extension='.sqd', datatype='meg', root=bids_root) assert op.exists(marker_fname)