Skip to content

Commit

Permalink
Merge pull request ME-ICA#2 from smoia/OLS_stats_modification
Browse files Browse the repository at this point in the history
Modified get_coeffs into get_ls_coeffs, corrected its use in the code.
  • Loading branch information
CesarCaballeroGaudes authored Nov 29, 2019
2 parents 1def734 + ba3aa26 commit 2ec3b3c
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 71 deletions.
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ API
:toctree: generated/
:template: function.rst

tedana.stats.get_coeffs
tedana.stats.get_ls_coeffs
tedana.stats.computefeats2
tedana.stats.getfbounds

Expand Down
10 changes: 5 additions & 5 deletions tedana/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from nilearn.image import new_img_like

from tedana import utils
from tedana.stats import computefeats2, get_coeffs
from tedana.stats import computefeats2, get_ls_coeffs

LGR = logging.getLogger(__name__)
RepLGR = logging.getLogger('REPORT')
Expand Down Expand Up @@ -47,8 +47,8 @@ def split_ts(data, mmix, mask, comptable):
"""
acc = comptable[comptable.classification == 'accepted'].index.values

cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True),
mmix, mask)
cbetas = get_ls_coeffs(data - data.mean(axis=-1, keepdims=True),
mmix, mask)
betas = cbetas[mask]
if len(acc) != 0:
hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
Expand Down Expand Up @@ -104,7 +104,7 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''):
dmdata = mdata.T - mdata.T.mean(axis=0)

# get variance explained by retained components
betas = get_coeffs(dmdata.T, mmix, mask=None)
betas = get_ls_coeffs(dmdata.T, mmix, mask=None)
varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() /
(dmdata**2.).sum()) * 100
LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))
Expand Down Expand Up @@ -223,7 +223,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img):

write_split_ts(ts, mmix, mask, comptable, ref_img, suffix='OC')

ts_B = get_coeffs(ts, mmix, mask)
ts_B = get_ls_coeffs(ts, mmix, mask)
fout = filewrite(ts_B, 'betas_OC', ref_img)
LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout)))

Expand Down
4 changes: 2 additions & 2 deletions tedana/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# ex: set sts=4 ts=4 sw=4 et:

from .kundu_fit import (
dependence_metrics, kundu_metrics, get_coeffs, computefeats2
dependence_metrics, kundu_metrics, get_ls_coeffs, computefeats2
)

__all__ = [
'dependence_metrics', 'kundu_metrics', 'get_coeffs', 'computefeats2']
'dependence_metrics', 'kundu_metrics', 'get_ls_coeffs', 'computefeats2']
10 changes: 5 additions & 5 deletions tedana/metrics/kundu_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from scipy import stats

from tedana import io, utils
from tedana.stats import getfbounds, computefeats2, get_coeffs
from tedana.stats import getfbounds, computefeats2, get_ls_coeffs


LGR = logging.getLogger(__name__)
Expand Down Expand Up @@ -107,7 +107,7 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img,
WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False)

# compute PSC dataset - shouldn't have to refit data
tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None)
tsoc_B = get_ls_coeffs(tsoc_dm, mmix, mask=None)
del tsoc_dm
tsoc_Babs = np.abs(tsoc_B)
PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100
Expand All @@ -124,9 +124,9 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img,
totvar_norm = (WTS**2).sum()

# compute Betas and means over TEs for TE-dependence analysis
betas = get_coeffs(utils.unmask(catd, mask),
mmix,
np.repeat(mask[:, np.newaxis], len(tes), axis=1))
betas = get_ls_coeffs(utils.unmask(catd, mask),
mmix,
np.repeat(mask[:, np.newaxis], len(tes), axis=1))
betas = betas[mask, ...]
n_voxels, n_echos, n_components = betas.shape
mu = catd.mean(axis=-1, dtype=float)
Expand Down
95 changes: 58 additions & 37 deletions tedana/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,37 @@
from scipy import stats

from tedana import utils
from tedana.due import due, BibTeX, Doi

LGR = logging.getLogger(__name__)
RepLGR = logging.getLogger('REPORT')
RefLGR = logging.getLogger('REFERENCES')

@due.dcite(references.T2Z_TRANSFORM,
T2Z_TRANSFORM = BibTeX("""
@article{hughett2007accurate,
title={Accurate Computation of the F-to-z and t-to-z Transforms
for Large Arguments},
author={Hughett, Paul and others},
journal={Journal of Statistical Software},
volume={23},
number={1},
pages={1--5},
year={2007},
publisher={Foundation for Open Access Statistics}
}
""")
T2Z_IMPLEMENTATION = Doi('10.5281/zenodo.32508')


@due.dcite(T2Z_TRANSFORM,
description='Introduces T-to-Z transform.')
@due.dcite(references.T2Z_IMPLEMENTATION,
@due.dcite(T2Z_IMPLEMENTATION,
description='Python implementation of T-to-Z transform.')

def t_to_z(t_values, dof):
"""
From Vanessa Sochat's TtoZ package.
Copyright (c) 2015 Vanessa Sochat
MIT Licensed
"""

# check if t_values is np.array, and convert if required
Expand Down Expand Up @@ -53,6 +71,7 @@ def t_to_z(t_values, dof):
out[t_values != 0] = z_values
return out


def getfbounds(n_echos):
"""
Gets F-statistic boundaries based on number of echos
Expand Down Expand Up @@ -96,12 +115,14 @@ def computefeats2(data, mmix, mask=None, normalize=True):
Data in component space
"""
if data.ndim != 2:
raise ValueError('Parameter data should be 2d, not {0}d'.format(data.ndim))
raise ValueError('Parameter data should be 2d, not '
'{0}d'.format(data.ndim))
elif mmix.ndim not in [2]:
raise ValueError('Parameter mmix should be 2d, not '
'{0}d'.format(mmix.ndim))
elif (mask is not None) and (mask.ndim != 1):
raise ValueError('Parameter mask should be 1d, not {0}d'.format(mask.ndim))
raise ValueError('Parameter mask should be 1d, not '
'{0}d'.format(mask.ndim))
elif (mask is not None) and (data.shape[0] != mask.shape[0]):
raise ValueError('First dimensions (number of samples) of data ({0}) '
'and mask ({1}) do not match.'.format(data.shape[0],
Expand All @@ -119,22 +140,15 @@ def computefeats2(data, mmix, mask=None, normalize=True):

# get betas and z-values of `data`~`mmix`
# mmix is normalized internally
data_R, data_Z = get_coeffs(data_vn, mmix, mask=None, add_const=False, compute_zvalues=True)
_, data_Z = get_ls_coeffs(data_vn, mmix, mask=None, add_const=False,
compute_zvalues=True)
if data_Z.ndim == 1:
data_Z = np.atleast_2d(data_Z).T

# normalize data (only division by std)
if normalize:
# minus mean and divided by std
data_Zm = stats.zscore(data_Z, axis=0)
# adding back the mean
data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
data_Z.std(axis=0, keepdims=True))

return data_Z


def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1):
def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1):
"""
Performs least-squares fit of `X` against `data`
Expand Down Expand Up @@ -162,21 +176,23 @@ def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_d
"""
if data.ndim not in [2, 3]:
raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim))
raise ValueError('Parameter data should be 2d or 3d, not '
'{0}d'.format(data.ndim))
elif X.ndim not in [2]:
raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim))
elif data.shape[-1] != X.shape[0]:
raise ValueError('Last dimension (dimension {0}) of data ({1}) does not '
'match first dimension of '
'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0]))
raise ValueError('Last dimension (dimension {0}) of data ({1}) does '
'not match first dimension of X '
'({2})'.format(data.ndim, data.shape[-1], X.shape[0]))

# mask data and flip (time x samples)
if mask is not None:
if mask.ndim not in [1, 2]:
raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim))
raise ValueError('Parameter data should be 1d or 2d, not '
'{0}d'.format(mask.ndim))
elif data.shape[0] != mask.shape[0]:
raise ValueError('First dimensions of data ({0}) and mask ({1}) do not '
'match'.format(data.shape[0], mask.shape[0]))
raise ValueError('First dimensions of data ({0}) and mask ({1}) do'
' not match'.format(data.shape[0], mask.shape[0]))
mdata = data[mask, :].T
else:
mdata = data.T
Expand All @@ -190,25 +206,32 @@ def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_d
if add_const: # add intercept, if specified
X = np.column_stack([X, np.ones((len(X), 1))])

# least squares estimation
betas = np.dot(np.linalg.pinv(X),mdata)
# least squares estimation: beta = (X^T * X)^(-1) * X^T * mdata
# betas is transposed due to backward compatibility with rest of code.
betas = np.dot(np.linalg.pinv(X), mdata).T

if compute_zvalues:
# compute t-values of betas (estimates) and then convert to z-values
# first compute number of degrees of freedom
df = mdata.shape[0] - X.shape[1]
if df == 0:
LGR.error('ERROR: No degrees of freedom left in least squares calculation. Stopping!!')
else:
elif df <= min_df:
LGR.warning('Number of degrees of freedom in least-square estimation is less than {}'.format(min_df+1))
# compute residual sum of squares (RSS)
RSS = np.sum(np.power(mdata - np.dot(X, betas.T),2),axis=0)/df
RSS = RSS[:,np.newaxis]
C = np.diag(np.linalg.pinv(np.dot(X.T,X)))
C = C[:,np.newaxis]
std_betas = np.sqrt(np.dot(RSS,C.T))
z_values = t_to_z(betas / std_betas,df)
LGR.error('ERROR: No degrees of freedom left in least squares '
'calculation. Stopping!!')
elif df <= min_df:
LGR.warning('Number of degrees of freedom in least-square '
'estimation is less than {}'.format(min_df + 1))
# compute sigma:
# RSS = sum{[mdata - (X * betas)]^2}
# sigma = RSS / Degrees_of_Freedom
sigma = np.sum(np.power(mdata - np.dot(X, betas.T), 2), axis=0) / df
sigma = sigma[:, np.newaxis]
# Copmute std of betas:
# C = (X^T * X)_ii^(-1)
# std(betas) = sqrt(sigma * C)
C = np.diag(np.linalg.pinv(np.dot(X.T, X)))
C = C[:, np.newaxis]
std_betas = np.sqrt(np.dot(sigma, C.T))
z_values = t_to_z(betas / std_betas, df)

if add_const: # drop beta for intercept, if specified
betas = betas[:, :-1]
Expand All @@ -224,5 +247,3 @@ def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_d
return betas, z_values
else:
return betas


38 changes: 19 additions & 19 deletions tedana/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import random

from tedana.stats import computefeats2
from tedana.stats import get_coeffs
from tedana.stats import get_ls_coeffs
from tedana.stats import getfbounds


Expand Down Expand Up @@ -58,7 +58,7 @@ def test_smoke_computefeats2():
assert computefeats2(data, mmix, normalize=False) is not None


def test_get_coeffs():
def test_get_ls_coeffs():
"""
Check least squares coefficients.
"""
Expand All @@ -69,26 +69,26 @@ def test_get_coeffs():
X = np.arange(0, 40)[:, np.newaxis]
mask = np.array([True, False])

betas = get_coeffs(data, X, mask=None, add_const=False)
betas = get_ls_coeffs(data, X, mask=None, add_const=False)
betas = np.squeeze(betas)
assert np.allclose(betas, np.array([5., 5.]))

betas = get_coeffs(data, X, mask=None, add_const=True)
betas = get_ls_coeffs(data, X, mask=None, add_const=True)
betas = np.squeeze(betas)
assert np.allclose(betas, np.array([5., 5.]))

betas = get_coeffs(data, X, mask=mask, add_const=False)
betas = get_ls_coeffs(data, X, mask=mask, add_const=False)
betas = np.squeeze(betas)
assert np.allclose(betas, np.array([5, 0]))

betas = get_coeffs(data, X, mask=mask, add_const=True)
betas = get_ls_coeffs(data, X, mask=mask, add_const=True)
betas = np.squeeze(betas)
assert np.allclose(betas, np.array([5, 0]))


def test_break_get_coeffs():
def test_break_get_ls_coeffs():
"""
Ensure that get_coeffs fails when input data do not have the right
Ensure that get_ls_coeffs fails when input data do not have the right
shapes.
"""
n_samples, n_echos, n_vols, n_comps = 10000, 5, 100, 50
Expand All @@ -98,41 +98,41 @@ def test_break_get_coeffs():

data = np.empty((n_samples))
with pytest.raises(ValueError):
get_coeffs(data, X, mask, add_const=False)
get_ls_coeffs(data, X, mask, add_const=False)

data = np.empty((n_samples, n_vols))
X = np.empty((n_vols))
with pytest.raises(ValueError):
get_coeffs(data, X, mask, add_const=False)
get_ls_coeffs(data, X, mask, add_const=False)

data = np.empty((n_samples, n_echos, n_vols + 1))
X = np.empty((n_vols, n_comps))
with pytest.raises(ValueError):
get_coeffs(data, X, mask, add_const=False)
get_ls_coeffs(data, X, mask, add_const=False)

data = np.empty((n_samples, n_echos, n_vols))
mask = np.empty((n_samples, n_echos, n_vols))
with pytest.raises(ValueError):
get_coeffs(data, X, mask, add_const=False)
get_ls_coeffs(data, X, mask, add_const=False)

mask = np.empty((n_samples + 1, n_echos))
with pytest.raises(ValueError):
get_coeffs(data, X, mask, add_const=False)
get_ls_coeffs(data, X, mask, add_const=False)


def test_smoke_get_coeffs():
def test_smoke_get_ls_coeffs():
"""
Ensure that get_coeffs returns outputs with different inputs and optional paramters
Ensure that get_ls_coeffs returns outputs with different inputs and optional paramters
"""
n_samples, _, n_times, n_components = 100, 5, 20, 6
data_2d = np.random.random((n_samples, n_times))
x = np.random.random((n_times, n_components))
mask = np.random.randint(2, size=n_samples)

assert get_coeffs(data_2d, x) is not None
# assert get_coeffs(data_3d, x) is not None TODO: submit an issue for the bug
assert get_coeffs(data_2d, x, mask=mask) is not None
assert get_coeffs(data_2d, x, add_const=True) is not None
assert get_ls_coeffs(data_2d, x) is not None
# assert get_ls_coeffs(data_3d, x) is not None TODO: submit an issue for the bug
assert get_ls_coeffs(data_2d, x, mask=mask) is not None
assert get_ls_coeffs(data_2d, x, add_const=True) is not None


def test_getfbounds():
Expand Down
4 changes: 2 additions & 2 deletions tedana/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
matplotlib.use('AGG')
import matplotlib.pyplot as plt

from tedana import metrics
from tedana.stats import get_ls_coeffs
from tedana.utils import get_spectrum

LGR = logging.getLogger(__name__)
Expand Down Expand Up @@ -79,7 +79,7 @@ def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir,
LGR.warning('Provided colormap is not recognized, proceeding with default')
png_cmap = 'coolwarm'
# regenerate the beta images
ts_B = metrics.get_coeffs(ts, mmix, mask)
ts_B = get_ls_coeffs(ts, mmix, mask)
ts_B = ts_B.reshape(ref_img.shape[:3] + ts_B.shape[1:])
# trim edges from ts_B array
ts_B = trim_edge_zeros(ts_B)
Expand Down

0 comments on commit 2ec3b3c

Please sign in to comment.