-
Notifications
You must be signed in to change notification settings - Fork 95
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ENH] Add verbose outputs for pipeline walkthrough (#174)
* Add outputs. * Update outputs. * Add betas back into fitmodels_direct outputs. * Finish reverting. * Update verbose outputs. * Clear outputs in notebooks and change output file names.
- Loading branch information
Showing
7 changed files
with
447 additions
and
268 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,360 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Generate plots for tedana workflow\n", | ||
"This notebook relies on outputs generated by the full tedana workflow. Some outputs needed to be added in specifically for these figures. The input data is a five-echo run of resting state data with 160 TRs, which has been preprocessed (slice timing correction and motion correction), but which has not been warped to standard space.\n", | ||
"\n", | ||
"Here is the command run:\n", | ||
"\n", | ||
"```\n", | ||
"tedana -d /Users/tsalo/Documents/tsalo/tedana_comparison/e5_data/p06.SBJ01_S09_Task11_e1.sm.nii.gz /Users/tsalo/Documents/tsalo/tedana_comparison/e5_data/p06.SBJ01_S09_Task11_e2.sm.nii.gz /Users/tsalo/Documents/tsalo/tedana_comparison/e5_data/p06.SBJ01_S09_Task11_e3.sm.nii.gz /Users/tsalo/Documents/tsalo/tedana_comparison/e5_data/p06.SBJ01_S09_Task11_e4.sm.nii.gz /Users/tsalo/Documents/tsalo/tedana_comparison/e5_data/p06.SBJ01_S09_Task11_e5.sm.nii.gz -e 15.4 29.7 44.0 58.3 72.6 --label mlepca --no_gscontrol\n", | ||
"```\n", | ||
"\n", | ||
"MLEPCA was *not* used for PCA dimensionality estimation (# components retained with MLEPCA = 159, the max number, so no whitening occurs). Instead, SVD + the decision tree was used. Scikit-learn's FastICA was used for ICA, and the Kundu v3.2 algorithm was used to classify ICA components." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline\n", | ||
"import os.path as op\n", | ||
"from glob import glob\n", | ||
"\n", | ||
"import numpy as np\n", | ||
"import nibabel as nib\n", | ||
"import seaborn as sns\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"\n", | ||
"from scipy import stats\n", | ||
"from nilearn.masking import apply_mask\n", | ||
"from sklearn.decomposition import PCA, FastICA" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Load data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"base_dir = '/Users/tsalo/Documents/tsalo/tedana-comparison/sandbox/'\n", | ||
"data_dir = '/Users/tsalo/Documents/tsalo/tedana-comparison/sandbox/e5_data/'\n", | ||
"mask_file = op.join(base_dir, 'current/mask.nii')\n", | ||
"echo_times = np.array([15.4, 29.7, 44.0, 58.3, 72.6])\n", | ||
"\n", | ||
"# Raw data (after STC and MC)\n", | ||
"files = sorted(glob(op.join(data_dir, '*.nii.gz')))\n", | ||
"\n", | ||
"masked_data = [apply_mask(f, mask_file) for f in files]\n", | ||
"masked_data = np.stack(masked_data)\n", | ||
"masked_data = np.swapaxes(np.swapaxes(masked_data, 0, 2), 1, 2)\n", | ||
"\n", | ||
"# Optimally combined data\n", | ||
"oc_file_t2s = op.join(base_dir, 'current/ts_OC.nii')\n", | ||
"oc_file_lin = op.join(base_dir, 'current-dectree-linear/ts_OC.nii')\n", | ||
"\n", | ||
"oc_t2s = apply_mask(oc_file_t2s, mask_file)\n", | ||
"oc_z_t2s = (oc_t2s - np.mean(oc_t2s, axis=0)) / np.std(oc_t2s, axis=0)\n", | ||
"\n", | ||
"oc_lin = apply_mask(oc_file_lin, mask_file)\n", | ||
"oc_z_lin = (oc_lin - np.mean(oc_lin, axis=0)) / np.std(oc_lin, axis=0)\n", | ||
"\n", | ||
"# Select voxel with block-ish timeseries\n", | ||
"voxel_idx = 14501 # corresponds to 22, 47, 2\n", | ||
"mask_img = nib.load(mask_file)\n", | ||
"temp_data = np.zeros(mask_img.shape)\n", | ||
"temp_data[22, 47, 2] = 1\n", | ||
"temp_img = nib.Nifti1Image(temp_data, mask_img.affine)\n", | ||
"temp_data = apply_mask(temp_img, mask_img)\n", | ||
"voxel_idx = np.where(temp_data == 1)[0][0]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fig, ax = plt.subplots(figsize=(14, 6))\n", | ||
"\n", | ||
"f = '/Users/tsalo/Documents/tsalo/tedana-comparison/sandbox/TED.p06.mlepca/ts_OC.nii'\n", | ||
"td = apply_mask(f, mask_img)\n", | ||
"td = td[:, voxel_idx]\n", | ||
"ax.plot(td, color='green')\n", | ||
"\n", | ||
"f = '/Users/tsalo/Documents/tsalo/tedana-comparison/sandbox/TED.p06.mlepca/dn_ts_OC.nii'\n", | ||
"td = apply_mask(f, mask_img)\n", | ||
"td = td[:, voxel_idx]\n", | ||
"ax.plot(td, color='red')\n", | ||
"\n", | ||
"f = '/Users/tsalo/Documents/tsalo/tedana-comparison/sandbox/TED.p06.mlepca/dn_ts_OC_T1c.nii'\n", | ||
"td = apply_mask(f, mask_img)\n", | ||
"td = td[:, voxel_idx]\n", | ||
"ax.plot(td, color='blue')\n", | ||
"\n", | ||
"fig.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"temp_img.to_filename('/Users/tsalo/Desktop/THING.nii.gz')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Multi-Echo Principal Components Analysis\n", | ||
"Optimally combined data are decomposed with PCA.\n", | ||
"The PCA components are selected according to one of multiple possible approaches.\n", | ||
"Two possible approaches are a decision tree and a threshold using the percentage of variance explained by each component." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fig, axes = plt.subplots(3, sharex=True, figsize=(14, 6))\n", | ||
"axes[0].plot(mepca_mmix[:, 0])\n", | ||
"axes[0].set_title('PCA Component 0', fontsize=16)\n", | ||
"axes[1].plot(mepca_mmix[:, 1])\n", | ||
"axes[1].set_title('PCA Component 1', fontsize=16)\n", | ||
"axes[2].plot(mepca_mmix[:, -1])\n", | ||
"axes[2].set_title('PCA Component {}'.format(mepca_mmix.shape[0]-1), fontsize=16)\n", | ||
"axes[0].set_xlim(0, mepca_mmix.shape[0]-1)\n", | ||
"axes[2].set_xticks([])\n", | ||
"axes[2].set_xlabel('Time', fontsize=16)\n", | ||
"axes[0].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[1].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[2].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"fig.tight_layout()\n", | ||
"fig.savefig('11_pca_component_timeseries.png', dpi=400)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Data Whitening\n", | ||
"The selected components from the PCA are recombined to produce a whitened version of the optimally combined data." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fig, ax = plt.subplots(figsize=(14, 6))\n", | ||
"ax.plot(oc_red[:, voxel_idx], label='Whitened timeseries', zorder=1.)\n", | ||
"ax.plot(oc_z[:, voxel_idx], label='Original timeseries', alpha=0.5, zorder=0., linewidth=3)\n", | ||
"legend = ax.legend(frameon=True, fontsize=16)\n", | ||
"ax.set_xlim(0, oc_z.shape[0]-1)\n", | ||
"ax.set_xticks([])\n", | ||
"ax.set_xlabel('Time', fontsize=16)\n", | ||
"ax.tick_params(axis='both', which='major', labelsize=14)\n", | ||
"fig.savefig('12_pca_whitened_data.png', dpi=400)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Multi-Echo Independent Components Analysis\n", | ||
"The whitened optimally combined data are then decomposed with ICA. The number of ICA components is limited to the number of retained components from the PCA, in order to reflect the true dimensionality of the data.\n", | ||
"ICA produces a mixing matrix (i.e., timeseries for each component)." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fig, axes = plt.subplots(3, sharex=True, figsize=(14, 6))\n", | ||
"axes[0].plot(meica_mmix[:, 89])\n", | ||
"axes[0].set_title('ICA Component 89', fontsize=16)\n", | ||
"axes[1].plot(meica_mmix[:, 38])\n", | ||
"axes[1].set_title('ICA Component 38 (High Kappa)', fontsize=16)\n", | ||
"axes[2].plot(meica_mmix[:, 68])\n", | ||
"axes[2].set_title('ICA Component 68 (High Rho)', fontsize=16)\n", | ||
"axes[0].set_xlim(0, meica_mmix.shape[0]-1)\n", | ||
"axes[2].set_xticks([])\n", | ||
"axes[2].set_xlabel('Time', fontsize=16)\n", | ||
"axes[0].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[1].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[2].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"fig.tight_layout()\n", | ||
"fig.savefig('13_ica_component_timeseries.png', dpi=400)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# $R_2$ and $S_0$ Model Fit\n", | ||
"Linear regression is used to fit the component timeseries to each voxel in each echo from the original, echo-specific data. This results in echo- and voxel-specific betas for each of the components. TE-dependence ($R_2$) and TE-independence ($S_0$) models can then be fit to these betas.\n", | ||
"\n", | ||
"These models allow calculation of F-statistics for the $R_2$ and $S_0$ models (referred to as $\\kappa$ and $\\rho$, respectively)." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"for i, comp in enumerate([89, 38, 68]): # only generate plots for a few components\n", | ||
" # Use weight map to average as fitmodels_direct does\n", | ||
" comp_weights = np.average(meica_betas[comp, :, :], weights=norm_weights[comp, :], axis=1)\n", | ||
" r2_pred_weights = np.average(r2_pred_betas[comp, :, :], weights=norm_weights[comp, :], axis=1)\n", | ||
" s0_pred_weights = np.average(s0_pred_betas[comp, :, :], weights=norm_weights[comp, :], axis=1)\n", | ||
" kappa = np.average(meica_f_r2[comp, :], weights=norm_weights[comp, :])\n", | ||
" rho = np.average(meica_f_s0[comp, :], weights=norm_weights[comp, :])\n", | ||
" \n", | ||
" mean_ = np.mean(comp_weights)\n", | ||
" std_ = np.std(comp_weights)\n", | ||
" comp_weights_z = (comp_weights - mean_) / std_\n", | ||
" s0_pred_weights_z = (s0_pred_weights - mean_) / std_\n", | ||
" r2_pred_weights_z = (r2_pred_weights - mean_) / std_\n", | ||
"\n", | ||
" fig, ax = plt.subplots(figsize=(8, 6))\n", | ||
" ax.plot(echo_times, comp_weights_z, color='black', label='Component Weight')\n", | ||
" ax.plot(echo_times, r2_pred_weights_z, color='blue', label='R2 Model Prediction')\n", | ||
" ax.plot(echo_times, s0_pred_weights_z, color='red', label='S0 Model Prediction')\n", | ||
" ax.set_xticks(echo_times)\n", | ||
" ax.tick_params(axis='both', which='major', labelsize=12)\n", | ||
" ax.set_xlabel('Echo Time (ms)', fontsize=16)\n", | ||
" temp = np.hstack((comp_weights_z, s0_pred_weights_z, r2_pred_weights_z))\n", | ||
" ax.set_ylim(np.floor(np.min(temp)), np.ceil(np.max(temp)))\n", | ||
" legend = ax.legend(frameon=True, loc='lower center', fontsize=14)\n", | ||
" ax.set_title('ICA Component {}'.format(comp), fontsize=16)\n", | ||
" ax.annotate('$\\kappa$: {0:.02f}\\n$\\\\rho$: {1:.02f}'.format(kappa, rho),\n", | ||
" xy=(40, 1.5), fontsize=16,\n", | ||
" bbox=dict(fc=\"white\", ec=\"black\", lw=1))\n", | ||
" fig.tight_layout()\n", | ||
" fig.savefig('14_te_dependence_models_component_{}.png'.format(i), dpi=400)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# ICA Component Selection and Multi-Echo Denoising\n", | ||
"A decision tree is applied to $\\kappa$, $\\rho$, and other metrics in order to classify ICA components as TE-dependent (BOLD signal), TE-independent (non-BOLD noise), or neither (to be ignored).\n", | ||
"\n", | ||
"The ICA components are fitted to the original (not whitened) optimally combined data with linear regression, which is used to weight the components for construction of the denoised data. The residuals from this regression will thus include the variance that was not included in the PCA-whitened optimally combined data.\n", | ||
"\n", | ||
"The ME-DN dataset is constructed from the accepted (BOLD) and ignored components, as well as the residual variance not explained by the ICA.\n", | ||
"The ME-HK dataset is constructed just from the accepted (BOLD) components. This means that ignored components and residual variance not explained by the ICA are not included in the resulting dataset." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"dn_data_z = (dn_data - np.mean(dn_data, axis=0)) / np.std(dn_data, axis=0)\n", | ||
"hk_data_z = (hk_data - np.mean(hk_data, axis=0)) / np.std(hk_data, axis=0)\n", | ||
"\n", | ||
"fig, axes = plt.subplots(3, sharex=True, figsize=(14, 6))\n", | ||
"axes[0].plot(oc_z[:, voxel_idx], label='Optimally combined')\n", | ||
"axes[0].set_title('Optimally combined', fontsize=16)\n", | ||
"\n", | ||
"axes[1].plot(dn_data_z[:, voxel_idx], label='ME-DN')\n", | ||
"axes[1].set_title('ME-DN', fontsize=16)\n", | ||
"\n", | ||
"axes[2].plot(hk_data_z[:, voxel_idx])\n", | ||
"axes[2].set_title('ME-HK', fontsize=16)\n", | ||
"legend = ax.legend(frameon=True)\n", | ||
"axes[0].set_xlim(0, oc_z.shape[0]-1)\n", | ||
"axes[2].set_xticks([])\n", | ||
"axes[2].set_xlabel('Time', fontsize=16)\n", | ||
"axes[0].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[1].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[2].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"fig.tight_layout()\n", | ||
"\n", | ||
"fig.savefig('15_denoised_data_timeseries.png', dpi=400)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Post-processing to remove spatially diffuse noise\n", | ||
"Due to the constraints of ICA, MEICA is able to identify and remove spatially localized noise components, but it cannot identify components that are spread out throughout the whole brain.\n", | ||
"\n", | ||
"One of several post-processing strategies may be applied to the ME-DN or ME-HK datasets in order to remove spatially diffuse (ostensibly respiration-related) noise. Methods which have been employed in the past include global signal regression (GSR), T1c-GSR, anatomical CompCor, Go Decomposition (GODEC), and robust PCA." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"dn_t1c_data_z = (dn_t1c_data - np.mean(dn_t1c_data, axis=0)) / np.std(dn_t1c_data, axis=0)\n", | ||
"hk_t1c_data_z = (hk_t1c_data - np.mean(hk_t1c_data, axis=0)) / np.std(hk_t1c_data, axis=0)\n", | ||
"\n", | ||
"fig, axes = plt.subplots(2, sharex=True, figsize=(14, 6))\n", | ||
"axes[0].plot(dn_t1c_data_z[:, voxel_idx], label='ME-DN T1c')\n", | ||
"axes[0].plot(dn_data_z[:, voxel_idx], label='ME-DN', alpha=0.5, linewidth=3, zorder=0.)\n", | ||
"axes[0].set_title('ME-DN', fontsize=16)\n", | ||
"legend = axes[0].legend(frameon=True, loc='upper right')\n", | ||
"\n", | ||
"axes[1].plot(hk_t1c_data_z[:, voxel_idx], label='ME-HK T1c')\n", | ||
"axes[1].plot(hk_data_z[:, voxel_idx], label='ME-HK', alpha=0.5, linewidth=3, zorder=0.)\n", | ||
"axes[1].set_title('ME-HK', fontsize=16)\n", | ||
"legend = axes[1].legend(frameon=True)\n", | ||
"axes[0].set_xlim(0, oc_z.shape[0]-1)\n", | ||
"axes[1].set_xticks([])\n", | ||
"axes[1].set_xlabel('Time', fontsize=16)\n", | ||
"axes[0].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"axes[1].tick_params(axis='both', which='major', labelsize=12)\n", | ||
"fig.tight_layout()\n", | ||
"fig.savefig('16_t1c_denoised_data_timeseries.png', dpi=400)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python [conda env:python3]", | ||
"language": "python", | ||
"name": "conda-env-python3-py" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.