From 022402b3c1e652b3589cde7df6a3b0960ec2cfcf Mon Sep 17 00:00:00 2001 From: Samed Rouven Vossberg <57455873+SamedVossberg@users.noreply.github.com> Date: Mon, 17 Jun 2024 18:02:15 +0200 Subject: [PATCH] Timing lsl (#337) * Create Settings class with Pydantic validation * Fixed a lot of bugs * add pydantic dependency * Remove StrEnum * Partially fixed tests * Refactor oscillatory * Fix oscillatory bug * vectorize_bispectra * More validation and settings refcatoring * minor changes * Settings class validation finished, all tests working * Ignore wrong order modules in __init__.py * Fix tests, added retrocompability functions * Add Pydantic dependency * Autoformat * Vectorize bursts calculation * Fix bug * autoformat * typing change * fix enable_all_features * vectorize bursts calculation, fix settings reset bug * review pydantic PR * fix bursts merge, fix osc tests * Remove getattr/setattr where possible * fix bug in sharpwaves * Add some comments, remove seglength * Replace nm_settings.json for a .yaml * Refactor normalizer classes * add more comments about burst vectorization * fix timing and logging in lsl stream * Sharpwave refactor and bugfixing * Speed-up sharpwaves computation * fix sharpeness bug * fix nolds default settings fband name * remove prints * Make MNE-connectivity non-verbose * Refactor nm_normalization + Handle divide by zero in feature normalization * Remove a file * Fix Numpy private function calls for Numpy 2.0 update * add new dependencies for numpy and scipy to be compatible with numpy _core methods * add function to read mne io read_raw data * fix doc example sharpwave * fix add feature example * changes in timing, exclude parllel processing * Update test_feature_sampling_rates.py Remove comments in tests * Update test_timing.py * fix test --------- Co-authored-by: Antonio Martinez Brotons <10654467+toni-neurosc@users.noreply.github.com> Co-authored-by: timonmerk --- py_neuromodulation/nm_mnelsl_generator.py | 2 +- py_neuromodulation/nm_mnelsl_stream.py | 8 +- py_neuromodulation/nm_run_analysis.py | 2 +- py_neuromodulation/nm_sharpwaves.py | 2 +- py_neuromodulation/nm_stream_abc.py | 6 -- py_neuromodulation/nm_stream_offline.py | 116 +++++----------------- tests/test_feature_sampling_rates.py | 2 + tests/test_lsl_stream.py | 34 ------- tests/test_multiprocessing.py | 63 ------------ tests/test_timing.py | 2 +- 10 files changed, 38 insertions(+), 199 deletions(-) delete mode 100644 tests/test_multiprocessing.py diff --git a/py_neuromodulation/nm_mnelsl_generator.py b/py_neuromodulation/nm_mnelsl_generator.py index 4502f7c7..0ea469cc 100644 --- a/py_neuromodulation/nm_mnelsl_generator.py +++ b/py_neuromodulation/nm_mnelsl_generator.py @@ -65,7 +65,7 @@ def __init__( self._path_raw = Path.cwd() / "temp_raw.fif" raw.save(self._path_raw, overwrite=True) - def start_player(self, chunk_size: int = 1, n_repeat: int = 1): + def start_player(self, chunk_size: int = 10, n_repeat: int = 1): """Start MNE-LSL Player Parameters diff --git a/py_neuromodulation/nm_mnelsl_stream.py b/py_neuromodulation/nm_mnelsl_stream.py index c077b129..ef9909bd 100644 --- a/py_neuromodulation/nm_mnelsl_stream.py +++ b/py_neuromodulation/nm_mnelsl_stream.py @@ -68,6 +68,7 @@ def get_next_batch(self) -> Iterator[tuple[np.ndarray, np.ndarray]]: self.last_time = time.time() check_data = None data = None + stream_start_time = None while self.stream.connected: time_diff = time.time() - self.last_time # in s @@ -75,7 +76,8 @@ def get_next_batch(self) -> Iterator[tuple[np.ndarray, np.ndarray]]: if time_diff >= self.sampling_interval: self.last_time = time.time() - logger.info(f"Pull data - current time: {self.last_time}") + logger.debug(f"Pull data - current time: {self.last_time}") + logger.debug(f"time since last data pull {time_diff} seconds") if time_diff >= 2 * self.sampling_interval: logger.warning( @@ -86,6 +88,8 @@ def get_next_batch(self) -> Iterator[tuple[np.ndarray, np.ndarray]]: check_data = data data, timestamp = self.stream.get_data(winsize=self.winsize) + if stream_start_time is None: + stream_start_time = timestamp[0] for i in range(self._n_seconds_wait_before_disconnect): if ( @@ -105,6 +109,8 @@ def get_next_batch(self) -> Iterator[tuple[np.ndarray, np.ndarray]]: yield timestamp, data + logger.info(f"Stream time: {timestamp[-1] - stream_start_time}") + if not self.headless and not self.listener.running: logger.info("Keyboard interrupt") self.stream.disconnect() diff --git a/py_neuromodulation/nm_run_analysis.py b/py_neuromodulation/nm_run_analysis.py index 1a533af7..66671488 100644 --- a/py_neuromodulation/nm_run_analysis.py +++ b/py_neuromodulation/nm_run_analysis.py @@ -282,7 +282,7 @@ def process(self, data: np.ndarray) -> dict[str, float]: features_dict[ch] = np.nan if self.verbose: - logger.info("Last batch took: %.2f seconds", time() - start_time) + logger.info("Last batch took: %.3f seconds to process", time() - start_time) return features_dict diff --git a/py_neuromodulation/nm_sharpwaves.py b/py_neuromodulation/nm_sharpwaves.py index 9f842856..8a3e0eab 100644 --- a/py_neuromodulation/nm_sharpwaves.py +++ b/py_neuromodulation/nm_sharpwaves.py @@ -363,7 +363,7 @@ def analyze_waveform(self, data) -> dict: left_height = data[troughs_valid - int(5 * (1000 / self.sfreq))] right_height = data[troughs_valid + int(5 * (1000 / self.sfreq))] # results["sharpness"] = ((trough_height - left_height) + (trough_height - right_height)) / 2 - results["sharpness"] = trough_height - 0.5 * (left_height + right_height) + results["sharpness"] = trough_height - 0.5 * (left_height + right_height) if self.need_steepness: # steepness is calculated as the first derivative diff --git a/py_neuromodulation/nm_stream_abc.py b/py_neuromodulation/nm_stream_abc.py index 019d6efa..c664c216 100644 --- a/py_neuromodulation/nm_stream_abc.py +++ b/py_neuromodulation/nm_stream_abc.py @@ -122,12 +122,6 @@ def run(self) -> pd.DataFrame: verbose=self.verbose, ) - @abstractmethod - def _add_timestamp(self, feature_series: pd.Series, cnt_samples: int) -> pd.Series: - """Add to feature_series "time" keyword - For Bids specify with fs_features, for real time analysis with current time stamp - """ - @staticmethod def _get_sess_lat(coords: dict) -> bool: if len(coords["cortex_left"]["positions"]) == 0: diff --git a/py_neuromodulation/nm_stream_offline.py b/py_neuromodulation/nm_stream_offline.py index b0d20f3d..5589f8b8 100644 --- a/py_neuromodulation/nm_stream_offline.py +++ b/py_neuromodulation/nm_stream_offline.py @@ -49,18 +49,6 @@ def _add_target(self, feature_dict: dict, data: np.ndarray) -> None: for target_idx, target_name in zip(self.target_indexes, self.target_names): feature_dict[target_name] = data[target_idx, -1] - def _add_timestamp(self, feature_dict: dict, cnt_samples: int) -> None: - """Add time stamp in ms. - - Due to normalization DataProcessor needs to keep track of the counted - samples. These are accessed here for time conversion. - """ - timestamp = cnt_samples * 1000 / self.sfreq - feature_dict["time"] = timestamp - - if self.verbose: - logger.info("%.2f seconds of data processed", timestamp / 1000) - def _handle_data(self, data: np.ndarray | pd.DataFrame) -> np.ndarray: names_expected = self.nm_channels["name"].to_list() @@ -87,35 +75,6 @@ def _handle_data(self, data: np.ndarray | pd.DataFrame) -> np.ndarray: ) return data.to_numpy().transpose() - def _check_settings_for_parallel(self): - """Check specified settings and raise error if parallel processing is not possible. - - Raises: - ValueError: depending on the settings, parallel processing is not possible - """ - - if "raw_normalization" in self.settings.preprocessing: - raise ValueError( - "Parallel processing is not possible with raw_normalization normalization." - ) - if self.settings.postprocessing.feature_normalization: - raise ValueError( - "Parallel processing is not possible with feature normalization." - ) - if self.settings.features.bursts: - raise ValueError( - "Parallel processing is not possible with burst estimation." - ) - - def _process_batch(self, data_batch, cnt_samples): - # if isinstance(data_batch, tuple): - # data_batch = np.array(data_batch[1]) - - feature_dict = self.data_processor.process(data_batch[1].astype(np.float64)) - self._add_timestamp(feature_dict, cnt_samples) - self._add_target(feature_dict, data_batch[1]) - return feature_dict - def _run( self, data: np.ndarray | pd.DataFrame | None = None, @@ -124,8 +83,6 @@ def _run( is_stream_lsl: bool = True, stream_lsl_name: str = None, plot_lsl: bool = False, - parallel: bool = False, - n_jobs: int = -2, ) -> pd.DataFrame: from py_neuromodulation.nm_generator import raw_data_generator @@ -159,54 +116,38 @@ def _run( generator = self.lsl_stream.get_next_batch() - sample_add = self.sfreq / self.data_processor.sfreq_features + l_features: list[dict] = [] + last_time = None - offset_time = self.settings.segment_length_features_ms - # offset_start = np.ceil(offset_time / 1000 * self.sfreq).astype(int) - offset_start = offset_time / 1000 * self.sfreq + while True: + next_item = next(generator, None) - if parallel: - from joblib import Parallel, delayed - from itertools import count + if next_item is not None: + time_, data_batch = next_item + else: + break - # parallel processing can not be utilized if a LSL stream is used - if is_stream_lsl: - error_msg = "Parallel processing is not possible with LSL stream." - logger.error(error_msg) - raise ValueError(error_msg) - - l_features = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(self._process_batch)(data_batch, cnt_samples) - for data_batch, cnt_samples in zip( - generator, count(offset_start, sample_add) - ) + if data_batch is None: + break + feature_dict = self.data_processor.process( + data_batch.astype(np.float64) ) + if is_stream_lsl: + feature_dict["time"] = time_[-1] + if self.verbose: + if last_time is not None: + logger.debug("%.3f seconds of new data processed", time_[-1] - last_time) + last_time = time_[-1] + else: + feature_dict["time"] = np.ceil(time_[-1] * 1000 +1 ).astype(int) + logger.info("Time: %.2f", feature_dict["time"]/1000) + - else: - l_features: list[dict] = [] - cnt_samples = offset_start - - while True: - next_item = next(generator, None) - - if next_item is not None: - time_, data_batch = next_item - else: - break - - if data_batch is None: - break - feature_dict = self.data_processor.process( - data_batch.astype(np.float64) - ) - self._add_timestamp(feature_dict, cnt_samples) - self._add_target(feature_dict, data_batch) - - l_features.append(feature_dict) + self._add_target(feature_dict, data_batch) - cnt_samples += sample_add + l_features.append(feature_dict) - feature_df = pd.DataFrame.from_records(l_features).astype(np.float64) + feature_df = pd.DataFrame(l_features) self.save_after_stream(out_path_root, folder_name, feature_df) @@ -344,8 +285,6 @@ def run( data: np.ndarray | pd.DataFrame | None = None, out_path_root: _PathLike = Path.cwd(), folder_name: str = "sub", - parallel: bool = False, - n_jobs: int = -2, stream_lsl: bool = False, stream_lsl_name: str = None, plot_lsl: bool = False, @@ -380,9 +319,6 @@ def run( elif self.data is None and data is None and self.stream_lsl is False: raise ValueError("No data passed to run function.") - if parallel: - self._check_settings_for_parallel() - out_path = Path(out_path_root, folder_name) out_path.mkdir(parents=True, exist_ok=True) logger.log_to_file(out_path) @@ -391,8 +327,6 @@ def run( data, out_path_root, folder_name, - parallel=parallel, - n_jobs=n_jobs, is_stream_lsl=stream_lsl, stream_lsl_name=stream_lsl_name, plot_lsl=plot_lsl, diff --git a/tests/test_feature_sampling_rates.py b/tests/test_feature_sampling_rates.py index 92a75963..bb9e9319 100644 --- a/tests/test_feature_sampling_rates.py +++ b/tests/test_feature_sampling_rates.py @@ -27,6 +27,8 @@ def test_different_sampling_rate_100Hz(): df = stream.run(arr_test) # check the difference between time points + #print(df["time"].iloc[:2]) + #print(sampling_rate_features) assert np.diff(df["time"].iloc[:2]) / 1000 == (1 / sampling_rate_features) diff --git a/tests/test_lsl_stream.py b/tests/test_lsl_stream.py index 33a8a3d0..520f6383 100644 --- a/tests/test_lsl_stream.py +++ b/tests/test_lsl_stream.py @@ -5,40 +5,6 @@ import time import threading - -def test_mne_lsl(setup_default_data): - """ Test the mne-lsl package and the core functionality of the player and stream classes. """ - raw, data, sfreq = setup_default_data - - player1 = PlayerLSL(raw, name="general_lsl_test_stream", chunk_size=10) - player1 = player1.start() - - stream1 = StreamLSL(name="general_lsl_test_stream", bufsize=2).connect() - ch_types = stream1.get_channel_types(unique=True) - assert 'dbs' in ch_types, "Expected at least one dbs channel from example data" - assert player1.info['nchan'] == 10, "Expected 10 channels from example data" - data_l = [] - timestamps_l = [] - - def call_every_100ms(): - data, timestamps = stream1.get_data(winsize=10) - data_l.append(data) - timestamps_l.append(timestamps) - - t = threading.Timer(0.1, call_every_100ms) - t.start() - - time_start = time.time() - - while time.time() - time_start <= 10: - time.sleep(1) - t.cancel() - - collected_data_shape = np.concatenate(data_l).shape - assert collected_data_shape[0] > 0 and collected_data_shape[1] > 0, "Expected non-empty data" - - stream1.disconnect() - player1.stop() @pytest.mark.parametrize('setup_lsl_player', ['search'], indirect=True) def test_lsl_stream_search(setup_lsl_player): diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py deleted file mode 100644 index 5a64b399..00000000 --- a/tests/test_multiprocessing.py +++ /dev/null @@ -1,63 +0,0 @@ -import py_neuromodulation as nm -import numpy as np -import pytest - - -@pytest.fixture -def get_stream(): - NUM_CHANNELS = 10 - NUM_DATA = 10000 - sfreq = 1000 # Hz - sampling_rate_features_hz = 3 # Hz - - data = np.random.random([NUM_CHANNELS, NUM_DATA]) - - stream = nm.Stream( - sfreq=sfreq, - data=data, - sampling_rate_features_hz=sampling_rate_features_hz, - ) - stream.nm_channels.loc[0, "target"] = 1 - stream.nm_channels.loc[0, "used"] = 0 - - stream.settings.postprocessing.feature_normalization = False - stream.settings.segment_length_features_ms = 5000 - - stream.settings.features.disable_all() - - stream.settings.features.nolds = False - stream.settings.features.fooof = True - stream.settings.features.bursts = False - stream.settings.features.mne_connectivity = False - - stream.settings.coherence.channels = [("ch1", "ch2")] - - return stream - - -def test_setting_exception(get_stream): - stream = get_stream - stream.settings.features.bursts = True - - with pytest.raises(Exception): - stream.run(parallel=True, n_jobs=-1) - - -def test_multiprocessing_and_sequntial_features(get_stream): - stream_seq = get_stream - features_sequential = stream_seq.run(parallel=False) - - stream_par = get_stream - features_multiprocessing = stream_par.run(parallel=True, n_jobs=-1) - - for column in features_sequential.columns: - if "fooof" in column: - # fooof results are different in multiprocessing and sequential processing - # This tests fails on Linux and Windows but passes on Mac OS; no idea why - continue - - assert features_sequential[ - column - ].equals( - features_multiprocessing[column] - ), f"Column {column} is not equal between sequential and parallel dataframes computation" diff --git a/tests/test_timing.py b/tests/test_timing.py index f0cc8b83..f2a8b5f2 100644 --- a/tests/test_timing.py +++ b/tests/test_timing.py @@ -75,4 +75,4 @@ def test_float_fs(): features.time.iloc[1] - features.time.iloc[0] ) == 1000 / sampling_rate_features_hz - assert features.time.iloc[0] == settings.segment_length_features_ms + assert features["time"].iloc[0] - 1 == settings["segment_length_features_ms"] # remove 1 due to python counting