Skip to content

Commit

Permalink
Merge pull request #739 from neuropsychology/add_microstates_example
Browse files Browse the repository at this point in the history
[Example] Add microstates analysis example
  • Loading branch information
DominiqueMakowski authored Nov 1, 2022
2 parents d37ca7a + d2c40b5 commit b480b77
Show file tree
Hide file tree
Showing 10 changed files with 939 additions and 77 deletions.
701 changes: 701 additions & 0 deletions docs/examples/eeg_microstates/eeg_microstates.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ EEG

eeg_power/eeg_power
eeg_complexity/eeg_complexity
eeg_microstates/eeg_microstates

Misc
=============
Expand Down
17 changes: 16 additions & 1 deletion neurokit2/markov/transition_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,21 @@ def transition_matrix(sequence, order=1, adjust=True, show=False):
**discrete Markov chains**. Each of its entries is a probability of transitioning from one
state to the other.
.. note::
This function is fairly new and hasn't be tested extensively. Please help us by
double-checking the code and letting us know if everything is correct.
Parameters
----------
sequence : Union[list, np.array, pd.Series]
A list of discrete states.
order : int
The order of the Markov chain.
adjust : bool
If ``True``, the transition matrix will be adjusted to ensure that the sum of each row is
equal to 1. This is useful when the transition matrix is used to represent a probability
distribution.
show : bool
Displays the transition matrix heatmap.
Expand Down Expand Up @@ -116,7 +127,11 @@ def transition_matrix(sequence, order=1, adjust=True, show=False):
# Loop over data dimensions and create text annotations.
for i, row in enumerate(tm.index):
for j, col in enumerate(tm.columns):
ax.text(j, i, f"{tm.loc[row, col]:.2f}", ha="center", va="center", color="w")
if tm.loc[row, col] > 0.5:
color = "white"
else:
color = "black"
ax.text(j, i, f"{tm.loc[row, col]:.2f}", ha="center", va="center", color=color)
ax.set_title("Transition Matrix")
fig.tight_layout()

Expand Down
40 changes: 32 additions & 8 deletions neurokit2/microstates/microstates_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,24 @@
import numpy as np
import pandas as pd

from .microstates_peaks import microstates_peaks
from ..eeg import eeg_gfp
from ..stats import standardize
from .microstates_peaks import microstates_peaks


def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True, normalize=True, gfp_method="l1", **kwargs):
def microstates_clean(
eeg,
sampling_rate=None,
train="gfp",
standardize_eeg=True,
normalize=True,
gfp_method="l1",
**kwargs
):
"""**Prepare eeg data for microstates extraction**
This is mostly a utility function to get the data ready for :func:`.microstates_segment`.
Parameters
----------
eeg : np.ndarray
Expand All @@ -18,7 +28,7 @@ def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True
The sampling frequency of the signal (in Hz, i.e., samples/second). Defaults to ``None``.
train : Union[str, int, float]
Method for selecting the timepoints how which to train the clustering algorithm. Can be
``"gfp"`` to use the peaks found in the ``Peaks`` in the global field power. Can be
``"gfp"`` to use the peaks found in the global field power (GFP). Can be
``"all"``, in which case it will select all the datapoints. It can also be a number or a
ratio, in which case it will select the corresponding number of evenly spread data points.
For instance, ``train=10`` will select 10 equally spaced datapoints, whereas ``train=0.5``
Expand Down Expand Up @@ -46,9 +56,19 @@ def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True
info : dict
Other information pertaining to the eeg raw object.
Examples
---------
.. ipython:: python
import neurokit2 as nk
eeg = nk.mne_data("filt-0-40_raw")
eeg, peaks, gfp, info = nk.microstates_clean(eeg, train="gfp")
See Also
--------
eeg_gfp, microstates_peaks
.eeg_gfp, microstates_peaks, .microstates_segment
"""
# If MNE object
Expand All @@ -64,11 +84,15 @@ def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True
eeg = standardize(eeg, **kwargs)

# Get GFP
gfp = eeg_gfp(eeg, sampling_rate=sampling_rate, normalize=normalize, method=gfp_method, **kwargs)
gfp = eeg_gfp(
eeg, sampling_rate=sampling_rate, normalize=normalize, method=gfp_method, **kwargs
)

# If train is a custom of vector (assume it's the pre-computed peaks)
if isinstance(train, (list, np.ndarray)):
peaks = train
# Find peaks in the global field power (GFP) or take a given amount of indices
if train == "gfp":
train = gfp
peaks = microstates_peaks(eeg, gfp=train, sampling_rate=sampling_rate, **kwargs)
else:
peaks = microstates_peaks(eeg, gfp=train, sampling_rate=sampling_rate, **kwargs)

return eeg, peaks, gfp, info
51 changes: 45 additions & 6 deletions neurokit2/microstates/microstates_complexity.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,60 @@
# -*- coding: utf-8 -*-
import pandas as pd
from ..misc import as_vector

from ..complexity import entropy_shannon
from ..misc import as_vector


def microstates_complexity(microstates, show=False):
"""**Complexity of Microstates Pattern**
This computes the complexity related to the sequence of the microstates pattern.
.. note::
def microstates_complexity(microstates):
"""Complexity of microstates pattern
This function does not compute all the features available under the complexity
submodule. Don't hesitate to open an issue to help us test and decide what features to
include.
Parameters
----------
microstates : np.ndarray
The topographic maps of the found unique microstates which has a shape of n_channels x
n_states, generated from :func:`.nk.microstates_segment`.
show : bool
Show the transition matrix.
See Also
--------
.microstates_dynamic, .microstates_static
Examples
--------
.. ipython:: python
import neurokit2 as nk
microstates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0, 2, 2]
@savefig p_microstates_complexity1.png scale=100%
nk.microstates_complexity(microstates, show=True)
@suppress
plt.close()
"""
# Try retrieving info
if isinstance(microstates, dict):
microstates = microstates["Sequence"]
# Sanitize
microstates = as_vector(microstates)

# Initialize output container
out = {}

# Empirical Shannon entropy
out["Entropy_Shannon"] = entropy_shannon(microstates)
out["Entropy_Shannon"], _ = entropy_shannon(microstates, show=show)

# Maximym entropy given the number of different states
# h_max = np.log2(len(np.unique(microstates)))
# h_max = np.log2(len(np.unique(microstates)))

df = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("Microstate_")
df = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("Microstates_")
return df
49 changes: 35 additions & 14 deletions neurokit2/microstates/microstates_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,74 @@
import pandas as pd

from ..markov import transition_matrix
from ..misc import as_vector


def microstates_dynamic(microstates):
"""**Dynamic properties of microstates (transition pattern)**
def microstates_dynamic(microstates, show=False):
"""**Dynamic Properties of Microstates**
Based on https://github.com/Frederic-vW/eeg_microstates and https://github.com/maximtrp/mchmm
This computes statistics related to the transition pattern (based on the
:func:`.transition_matrix`).
.. note::
This function does not compute all the features available under the markov submodule.
Don't hesitate to open an issue to help us test and decide what features to include.
Parameters
----------
microstates : np.ndarray
The topographic maps of the found unique microstates which has a shape of n_channels x
n_states, generated from :func:`.nk.microstates_segment`.
show : bool
Show the transition matrix.
Returns
-------
DataFrame
Dynamic properties of microstates:
* Results of the observed transition matrix
* Chi-square test statistics of the observed microstates against the expected microstates
* Symmetry test statistics of the observed microstates against the expected microstates
Dynamic properties of microstates.
See Also
--------
transition_matrix
.transition_matrix, .microstates_static
Examples
--------
.. ipython:: python
import neurokit2 as nk
microstates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0]
nk.microstates_dynamic(microstates)
microstates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0, 2, 2]
@savefig p_microstates_dynamic1.png scale=100%
nk.microstates_dynamic(microstates, show=True)
@suppress
plt.close()
"""
# See https://github.com/Frederic-vW/eeg_microstates
# and https://github.com/maximtrp/mchmm
# for other implementations

out = {}

# Try retrieving info
if isinstance(microstates, dict):
microstates = microstates["Sequence"]
# Sanitize
microstates = as_vector(microstates)

# Transition matrix
tm, info = transition_matrix(microstates)
tm, info = transition_matrix(microstates, show=show)

for row in tm.index:
for col in tm.columns:
out[str(tm.loc[row].name) + "_to_" + str(tm[col].name)] = tm[col][row]

# out.update(results)
# out.pop("Expected")

df = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("Microstate_")

# TODO:
# * Chi-square test statistics of the observed microstates against the expected microstates
# * Symmetry test statistics of the observed microstates against the expected microstates

return df
68 changes: 48 additions & 20 deletions neurokit2/microstates/microstates_findnumber.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,48 @@
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..misc import find_knee, progress_bar
from ..stats.cluster_quality import _cluster_quality_dispersion
from .microstates_segment import microstates_segment


def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
def microstates_findnumber(
eeg, n_max=12, method="GEV", clustering_method="kmod", show=False, verbose=True, **kwargs
):
"""**Estimate optimal number of microstates**
Estimate the optimal number of microstates using a variety of indices.
Computes statistical indices useful for estimating the optimal number of microstates using a
* **Global Explained Variance (GEV)**: measures how similar each EEG sample is to its assigned
microstate class. The **higher** (closer to 1), the better the segmentation.
* **Krzanowski-Lai Criterion (KL)**: measures quality of microstate segmentation based on the
dispersion measure (average distance between samples in the same microstate class); the
**larger the KL value**, the better the segmentation. Note that KL is not a polarity
invariant measure and thus might not be a suitable measure of fit for polarity-invariant
methods such as modified K-means and (T)AAHC.
Parameters
----------
eeg : np.ndarray
An array (channels, times) of M/EEG data or a Raw or Epochs object from MNE.
n_max : int
Maximum number of microstates to try. A higher number leads to a longer process.
method : str
The method to use to estimate the optimal number of microstates. Can be "GEV" (the elbow,
detected using :func:`find_knee`), or "KL" (the location of the maximum value).
show : bool
Plot indices normalized on the same scale.
verbose : bool
Print progress bar.
**kwargs
Arguments to be passed to :func:`.microstates_segment`
Returns
-------
int
Optimal number of microstates.
DataFrame
The different quality scores for each number of microstates.
Expand All @@ -37,11 +56,13 @@ def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
import neurokit2 as nk
eeg = nk.mne_data("filt-0-40_raw").filter(1, 35)
eeg = nk.eeg_rereference(eeg, 'average')
eeg = nk.mne_data("filt-0-40_raw").crop(0, 5)
# Estimate optimal number (currently comment out due to memory error)
# results = nk.microstates_findnumber(eeg, n_max=4, show=True, method="kmod")
# Estimate optimal number
@savefig p_microstates_findnumber1.png scale=100%
n_clusters, results = nk.microstates_findnumber(eeg, n_max=8, show=True)
@suppress
plt.close()
"""
# Retrieve data
Expand All @@ -57,17 +78,13 @@ def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
dispersion_previous = np.nan
dispersion_diff_previous = np.nan
results = []
for idx, n_microstates in enumerate(range(2, n_max + 1)):
print(idx, n_microstates)
out = microstates_segment(eeg, n_microstates=n_microstates)

segmentation = out["Sequence"]
# info = out["Info_algorithm"]
# sd = out["GFP"]
for idx, n_microstates in progress_bar(range(2, n_max + 1), verbose=verbose):

# nk.cluster_quality(data.T, segmentation, clusters=microstates, info=info, n_random=10, sd=gfp)
out = microstates_segment(
eeg, n_microstates=n_microstates, method=clustering_method, **kwargs
)

# _, rez = _cluster_quality_sklearn(data.T, segmentation, microstates)
segmentation = out["Sequence"]
rez = {}

rez["Score_GEV"] = out["GEV"]
Expand All @@ -92,14 +109,25 @@ def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
dispersion_diff_previous = dispersion_diff.copy()

results.append(rez)

results = pd.DataFrame(results)
# results.append(pd.DataFrame.from_dict(rez, orient="index").T)
# results = pd.concat(results, axis=0).reset_index(drop=True)

# Estimate optimal number
if method == "KL":
n_clusters = int(np.argmax(results["KL_Criterion"]) + 2)
else:
n_clusters = find_knee(results["Score_GEV"], np.rint(np.arange(2, n_max + 1)))

if show is True:
normalized = (results - results.min()) / (results.max() - results.min())
normalized["n_Clusters"] = np.rint(np.arange(2, n_max))
normalized["n_Clusters"] = np.rint(np.arange(2, n_max + 1))
normalized.columns = normalized.columns.str.replace("Score", "Normalized")
normalized.plot(x="n_Clusters")

return results
plt.axvline(n_clusters, color="red", linestyle="--", label=f"Method: {method}")
plt.legend()
plt.xticks(np.rint(np.arange(2, n_max + 1)))
plt.xlabel("Number of microstates")
plt.ylabel("Normalized score")
plt.title("Optimal number of microstates")

return n_clusters, results
Loading

0 comments on commit b480b77

Please sign in to comment.