Merge pull request #739 from neuropsychology/add_microstates_example

[Example] Add microstates analysis example
neuropsychology · Nov 1, 2022 · b480b77 · b480b77
2 parents d37ca7a + d2c40b5
commit b480b77
Show file tree

Hide file tree

Showing 10 changed files with 939 additions and 77 deletions.
diff --git a/docs/examples/eeg_microstates/eeg_microstates.ipynb b/docs/examples/eeg_microstates/eeg_microstates.ipynb
diff --git a/docs/examples/index.rst b/docs/examples/index.rst
@@ -29,6 +29,7 @@ EEG
 
    eeg_power/eeg_power
    eeg_complexity/eeg_complexity
+   eeg_microstates/eeg_microstates
 
 Misc
 =============

diff --git a/neurokit2/markov/transition_matrix.py b/neurokit2/markov/transition_matrix.py
@@ -14,10 +14,21 @@ def transition_matrix(sequence, order=1, adjust=True, show=False):
     **discrete Markov chains**. Each of its entries is a probability of transitioning from one
     state to the other.
 
+    .. note::
+
+        This function is fairly new and hasn't be tested extensively. Please help us by
+        double-checking the code and letting us know if everything is correct.
+
     Parameters
     ----------
     sequence : Union[list, np.array, pd.Series]
         A list of discrete states.
+    order : int
+        The order of the Markov chain.
+    adjust : bool
+        If ``True``, the transition matrix will be adjusted to ensure that the sum of each row is
+        equal to 1. This is useful when the transition matrix is used to represent a probability
+        distribution.
     show : bool
         Displays the transition matrix heatmap.
 
@@ -116,7 +127,11 @@ def transition_matrix(sequence, order=1, adjust=True, show=False):
         # Loop over data dimensions and create text annotations.
         for i, row in enumerate(tm.index):
             for j, col in enumerate(tm.columns):
-                ax.text(j, i, f"{tm.loc[row, col]:.2f}", ha="center", va="center", color="w")
+                if tm.loc[row, col] > 0.5:
+                    color = "white"
+                else:
+                    color = "black"
+                ax.text(j, i, f"{tm.loc[row, col]:.2f}", ha="center", va="center", color=color)
         ax.set_title("Transition Matrix")
         fig.tight_layout()
 

diff --git a/neurokit2/microstates/microstates_clean.py b/neurokit2/microstates/microstates_clean.py
@@ -2,14 +2,24 @@
 import numpy as np
 import pandas as pd
 
-from .microstates_peaks import microstates_peaks
 from ..eeg import eeg_gfp
 from ..stats import standardize
+from .microstates_peaks import microstates_peaks
 
 
-def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True, normalize=True, gfp_method="l1", **kwargs):
+def microstates_clean(
+    eeg,
+    sampling_rate=None,
+    train="gfp",
+    standardize_eeg=True,
+    normalize=True,
+    gfp_method="l1",
+    **kwargs
+):
     """**Prepare eeg data for microstates extraction**
 
+    This is mostly a utility function to get the data ready for :func:`.microstates_segment`.
+
     Parameters
     ----------
     eeg : np.ndarray
@@ -18,7 +28,7 @@ def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True
         The sampling frequency of the signal (in Hz, i.e., samples/second). Defaults to ``None``.
     train : Union[str, int, float]
         Method for selecting the timepoints how which to train the clustering algorithm. Can be
-        ``"gfp"`` to use the peaks found in the ``Peaks`` in the global field power. Can be
+        ``"gfp"`` to use the peaks found in the global field power (GFP). Can be
         ``"all"``, in which case it will select all the datapoints. It can also be a number or a
         ratio, in which case it will select the corresponding number of evenly spread data points.
         For instance, ``train=10`` will select 10 equally spaced datapoints, whereas ``train=0.5``
@@ -46,9 +56,19 @@ def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True
     info : dict
         Other information pertaining to the eeg raw object.
 
+    Examples
+    ---------
+    .. ipython:: python
+
+      import neurokit2 as nk
+
+      eeg = nk.mne_data("filt-0-40_raw")
+      eeg, peaks, gfp, info = nk.microstates_clean(eeg, train="gfp")
+
+
     See Also
     --------
-    eeg_gfp, microstates_peaks
+    .eeg_gfp, microstates_peaks, .microstates_segment
 
     """
     # If MNE object
@@ -64,11 +84,15 @@ def microstates_clean(eeg, sampling_rate=None, train="gfp", standardize_eeg=True
         eeg = standardize(eeg, **kwargs)
 
     # Get GFP
-    gfp = eeg_gfp(eeg, sampling_rate=sampling_rate, normalize=normalize, method=gfp_method, **kwargs)
+    gfp = eeg_gfp(
+        eeg, sampling_rate=sampling_rate, normalize=normalize, method=gfp_method, **kwargs
+    )
 
+    # If train is a custom of vector (assume it's the pre-computed peaks)
+    if isinstance(train, (list, np.ndarray)):
+        peaks = train
     # Find peaks in the global field power (GFP) or take a given amount of indices
-    if train == "gfp":
-        train = gfp
-    peaks = microstates_peaks(eeg, gfp=train, sampling_rate=sampling_rate, **kwargs)
+    else:
+        peaks = microstates_peaks(eeg, gfp=train, sampling_rate=sampling_rate, **kwargs)
 
     return eeg, peaks, gfp, info
diff --git a/neurokit2/microstates/microstates_complexity.py b/neurokit2/microstates/microstates_complexity.py
@@ -1,21 +1,60 @@
 # -*- coding: utf-8 -*-
 import pandas as pd
-from ..misc import as_vector
+
 from ..complexity import entropy_shannon
+from ..misc import as_vector
+
+
+def microstates_complexity(microstates, show=False):
+    """**Complexity of Microstates Pattern**
+
+    This computes the complexity related to the sequence of the microstates pattern.
 
+    .. note::
 
-def microstates_complexity(microstates):
-    """Complexity of microstates pattern
+        This function does not compute all the features available under the complexity
+        submodule. Don't hesitate to open an issue to help us test and decide what features to
+        include.
+
+    Parameters
+    ----------
+    microstates : np.ndarray
+        The topographic maps of the found unique microstates which has a shape of n_channels x
+        n_states, generated from :func:`.nk.microstates_segment`.
+    show : bool
+        Show the transition matrix.
+
+    See Also
+    --------
+    .microstates_dynamic, .microstates_static
+
+    Examples
+    --------
+    .. ipython:: python
+
+      import neurokit2 as nk
+
+      microstates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0, 2, 2]
+      @savefig p_microstates_complexity1.png scale=100%
+      nk.microstates_complexity(microstates, show=True)
+      @suppress
+      plt.close()
 
     """
+    # Try retrieving info
+    if isinstance(microstates, dict):
+        microstates = microstates["Sequence"]
+    # Sanitize
     microstates = as_vector(microstates)
+
+    # Initialize output container
     out = {}
 
     # Empirical Shannon entropy
-    out["Entropy_Shannon"] = entropy_shannon(microstates)
+    out["Entropy_Shannon"], _ = entropy_shannon(microstates, show=show)
 
     # Maximym entropy given the number of different states
-#    h_max = np.log2(len(np.unique(microstates)))
+    #    h_max = np.log2(len(np.unique(microstates)))
 
-    df = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("Microstate_")
+    df = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("Microstates_")
     return df
diff --git a/neurokit2/microstates/microstates_dynamic.py b/neurokit2/microstates/microstates_dynamic.py
@@ -2,53 +2,74 @@
 import pandas as pd
 
 from ..markov import transition_matrix
+from ..misc import as_vector
 
 
-def microstates_dynamic(microstates):
-    """**Dynamic properties of microstates (transition pattern)**
+def microstates_dynamic(microstates, show=False):
+    """**Dynamic Properties of Microstates**
 
-    Based on https://github.com/Frederic-vW/eeg_microstates and https://github.com/maximtrp/mchmm
+    This computes statistics related to the transition pattern (based on the
+    :func:`.transition_matrix`).
+
+    .. note::
+
+        This function does not compute all the features available under the markov submodule.
+        Don't hesitate to open an issue to help us test and decide what features to include.
 
     Parameters
     ----------
     microstates : np.ndarray
         The topographic maps of the found unique microstates which has a shape of n_channels x
         n_states, generated from :func:`.nk.microstates_segment`.
+    show : bool
+        Show the transition matrix.
+
 
     Returns
     -------
     DataFrame
-        Dynamic properties of microstates:
-        * Results of the observed transition matrix
-        * Chi-square test statistics of the observed microstates against the expected microstates
-        * Symmetry test statistics of the observed microstates against the expected microstates
+        Dynamic properties of microstates.
 
     See Also
     --------
-    transition_matrix
+    .transition_matrix, .microstates_static
 
     Examples
     --------
     .. ipython:: python
 
       import neurokit2 as nk
 
-      microstates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0]
-      nk.microstates_dynamic(microstates)
+      microstates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0, 2, 2]
+      @savefig p_microstates_dynamic1.png scale=100%
+      nk.microstates_dynamic(microstates, show=True)
+      @suppress
+      plt.close()
 
     """
+    # See https://github.com/Frederic-vW/eeg_microstates
+    # and https://github.com/maximtrp/mchmm
+    # for other implementations
+
     out = {}
 
+    # Try retrieving info
+    if isinstance(microstates, dict):
+        microstates = microstates["Sequence"]
+    # Sanitize
+    microstates = as_vector(microstates)
+
     # Transition matrix
-    tm, info = transition_matrix(microstates)
+    tm, info = transition_matrix(microstates, show=show)
 
     for row in tm.index:
         for col in tm.columns:
             out[str(tm.loc[row].name) + "_to_" + str(tm[col].name)] = tm[col][row]
 
-    # out.update(results)
-    # out.pop("Expected")
-
     df = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("Microstate_")
 
+    # TODO:
+    # * Chi-square test statistics of the observed microstates against the expected microstates
+    # * Symmetry test statistics of the observed microstates against the expected microstates
+
     return df
diff --git a/neurokit2/microstates/microstates_findnumber.py b/neurokit2/microstates/microstates_findnumber.py
@@ -1,29 +1,48 @@
 # -*- coding: utf-8 -*-
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
+from ..misc import find_knee, progress_bar
 from ..stats.cluster_quality import _cluster_quality_dispersion
 from .microstates_segment import microstates_segment
 
 
-def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
+def microstates_findnumber(
+    eeg, n_max=12, method="GEV", clustering_method="kmod", show=False, verbose=True, **kwargs
+):
     """**Estimate optimal number of microstates**
 
-    Estimate the optimal number of microstates using a variety of indices.
+    Computes statistical indices useful for estimating the optimal number of microstates using a
+
+    * **Global Explained Variance (GEV)**: measures how similar each EEG sample is to its assigned
+      microstate class. The **higher** (closer to 1), the better the segmentation.
+    * **Krzanowski-Lai Criterion (KL)**: measures quality of microstate segmentation based on the
+      dispersion measure (average distance between samples in the same microstate class); the
+      **larger the KL value**, the better the segmentation. Note that KL is not a polarity
+      invariant measure and thus might not be a suitable measure of fit for polarity-invariant
+      methods such as modified K-means and (T)AAHC.
 
     Parameters
     ----------
     eeg : np.ndarray
         An array (channels, times) of M/EEG data or a Raw or Epochs object from MNE.
     n_max : int
         Maximum number of microstates to try. A higher number leads to a longer process.
+    method : str
+        The method to use to estimate the optimal number of microstates. Can be "GEV" (the elbow,
+        detected using :func:`find_knee`), or "KL" (the location of the maximum value).
     show : bool
         Plot indices normalized on the same scale.
+    verbose : bool
+        Print progress bar.
     **kwargs
         Arguments to be passed to :func:`.microstates_segment`
 
     Returns
     -------
+    int
+        Optimal number of microstates.
     DataFrame
         The different quality scores for each number of microstates.
 
@@ -37,11 +56,13 @@ def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
 
       import neurokit2 as nk
 
-      eeg = nk.mne_data("filt-0-40_raw").filter(1, 35)
-      eeg = nk.eeg_rereference(eeg, 'average')
+      eeg = nk.mne_data("filt-0-40_raw").crop(0, 5)
 
-      # Estimate optimal number (currently comment out due to memory error)
-      # results = nk.microstates_findnumber(eeg, n_max=4, show=True, method="kmod")
+      # Estimate optimal number
+      @savefig p_microstates_findnumber1.png scale=100%
+      n_clusters, results = nk.microstates_findnumber(eeg, n_max=8, show=True)
+      @suppress
+      plt.close()
 
     """
     # Retrieve data
@@ -57,17 +78,13 @@ def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
     dispersion_previous = np.nan
     dispersion_diff_previous = np.nan
     results = []
-    for idx, n_microstates in enumerate(range(2, n_max + 1)):
-        print(idx, n_microstates)
-        out = microstates_segment(eeg, n_microstates=n_microstates)
-
-        segmentation = out["Sequence"]
-        #        info = out["Info_algorithm"]
-        #        sd = out["GFP"]
+    for idx, n_microstates in progress_bar(range(2, n_max + 1), verbose=verbose):
 
-        #        nk.cluster_quality(data.T, segmentation, clusters=microstates, info=info, n_random=10, sd=gfp)
+        out = microstates_segment(
+            eeg, n_microstates=n_microstates, method=clustering_method, **kwargs
+        )
 
-        #        _, rez = _cluster_quality_sklearn(data.T, segmentation, microstates)
+        segmentation = out["Sequence"]
         rez = {}
 
         rez["Score_GEV"] = out["GEV"]
@@ -92,14 +109,25 @@ def microstates_findnumber(eeg, n_max=12, show=False, **kwargs):
         dispersion_diff_previous = dispersion_diff.copy()
 
         results.append(rez)
+
     results = pd.DataFrame(results)
-    #        results.append(pd.DataFrame.from_dict(rez, orient="index").T)
-    #    results = pd.concat(results, axis=0).reset_index(drop=True)
+
+    # Estimate optimal number
+    if method == "KL":
+        n_clusters = int(np.argmax(results["KL_Criterion"]) + 2)
+    else:
+        n_clusters = find_knee(results["Score_GEV"], np.rint(np.arange(2, n_max + 1)))
 
     if show is True:
         normalized = (results - results.min()) / (results.max() - results.min())
-        normalized["n_Clusters"] = np.rint(np.arange(2, n_max))
+        normalized["n_Clusters"] = np.rint(np.arange(2, n_max + 1))
         normalized.columns = normalized.columns.str.replace("Score", "Normalized")
         normalized.plot(x="n_Clusters")
-
-    return results
+        plt.axvline(n_clusters, color="red", linestyle="--", label=f"Method: {method}")
+        plt.legend()
+        plt.xticks(np.rint(np.arange(2, n_max + 1)))
+        plt.xlabel("Number of microstates")
+        plt.ylabel("Normalized score")
+        plt.title("Optimal number of microstates")
+
+    return n_clusters, results