Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major refactor -- rm all globals #35

Merged
merged 5 commits into from
May 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions tedana/cli/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import argparse
from tedana.interfaces import tedana
from tedana import workflows


def get_parser():
Expand Down Expand Up @@ -100,7 +100,7 @@ def get_parser():
dest='filecsdata',
help='Save component selection data',
action='store_true',
default=False)
default=True)
parser.add_argument('--label',
dest='label',
help='Label for output directory.',
Expand All @@ -115,7 +115,7 @@ def get_parser():
def main(argv=None):
"""Entry point"""
options = get_parser().parse_args(argv)
tedana.main(**vars(options))
workflows.tedana.main(**vars(options))


if __name__ == '__main__':
Expand Down
11 changes: 11 additions & 0 deletions tedana/decomp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:

from .eigendecomp import (
tedpca, tedica, eimask,
)


__all__ = [
'tedpca', 'tedica', 'eimask'
]
255 changes: 255 additions & 0 deletions tedana/decomp/eigendecomp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
import pickle
import numpy as np
import os.path as op
from scipy import stats
from tedana import model, utils

import logging
logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO)
lgr = logging.getLogger(__name__)

F_MAX = 500
Z_MAX = 8


def eimask(dd, ees=None):
"""
Returns mask for data between [0.001, 5] * 98th percentile of dd
Parameters
----------
dd : (S x E x T) array_like
Input data, where `S` is samples, `E` is echos, and `T` is time
ees : (N,) list
Indices of echos to assess from `dd` in calculating output
Returns
-------
imask : (S x N) np.ndarray
Boolean array denoting
"""

if ees is None:
ees = range(dd.shape[1])
imask = np.zeros([dd.shape[0], len(ees)], dtype=bool)
for ee in ees:
lgr.info('++ Creating eimask for echo {}'.format(ee))
perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98,
interpolation_method='lower')
lthr, hthr = 0.001 * perc98, 5 * perc98
lgr.info('++ Eimask threshold boundaries: '
'{:.03f} {:.03f}'.format(lthr, hthr))
m = dd[:, ee, :].mean(axis=1)
imask[np.logical_and(m > lthr, m < hthr), ee] = True

return imask


def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize,
ref_img, tes, kdaw, rdaw, ste=0, mlepca=True):
"""
Performs PCA on `catd` and uses TE-dependence to dimensionally reduce data
Parameters
----------
catd : (S x E x T) array_like
Input functional data
OCcatd : (S x T) array_like
Optimally-combined time series data
combmode : {'t2s', 'ste'} str
How optimal combination of echos should be made, where 't2s' indicates
using the method of Posse 1999 and 'ste' indicates using the method of
Poser 2006
mask : (S,) array_like
Boolean mask array
stabilize : bool
Whether to attempt to stabilize convergence of ICA by returning
dimensionally-reduced data from PCA and component selection.
ref_img : str or img_like
Reference image to dictate how outputs are saved to disk
tes : list
List of echo times associated with `catd`, in milliseconds
kdaw : float
Dimensionality augmentation weight for Kappa calculations
rdaw : float
Dimensionality augmentation weight for Rho calculations
ste : int or list-of-int, optional
Which echos to use in PCA. Values -1 and 0 are special, where a value
of -1 will indicate using all the echos and 0 will indicate using the
optimal combination of the echos. A list can be provided to indicate
a subset of echos. Default: 0
mlepca : bool, optional
Whether to use the method originally explained in Minka, NIPS 2000 for
guessing PCA dimensionality instead of a traditional SVD. Default: True
Returns
-------
n_components : int
Number of components retained from PCA decomposition
dd : (S x E x T) np.ndarray
Dimensionally-reduced functional data
"""

n_samp, n_echos, n_vols = catd.shape
ste = np.array([int(ee) for ee in str(ste).split(',')])

if len(ste) == 1 and ste[0] == -1:
lgr.info('++ Computing PCA of optimally combined multi-echo data')
d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :]
elif len(ste) == 1 and ste[0] == 0:
lgr.info('++ Computing PCA of spatially concatenated multi-echo data')
d = catd[mask].astype('float64')
else:
lgr.info('++ Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste]))
d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')

eim = np.squeeze(eimask(d))
d = np.squeeze(d[eim])

dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts
dz = (dz - dz.mean()) / dz.std() # var normalize everything

if not op.exists('pcastate.pkl'):
# do PC dimension selection and get eigenvalue cutoff
if mlepca:
from sklearn.decomposition import PCA
ppca = PCA(n_components='mle', svd_solver='full')
ppca.fit(dz)
v = ppca.components_
s = ppca.explained_variance_
u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
else:
u, s, v = np.linalg.svd(dz, full_matrices=0)

# actual variance explained (normalized)
sp = s / s.sum()
eigelb = model.getelbow_mod(sp, val=True)

spdif = np.abs(np.diff(sp))
spdifh = spdif[(len(spdif)//2):]
spdthr = np.mean([spdifh.max(), spdif.min()])
spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
spcum = np.cumsum(sp)

# Compute K and Rho for PCA comps
eimum = np.atleast_2d(eim)
eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
eimum = eimum.prod(axis=1)
o = np.zeros((mask.shape[0], *eimum.shape[1:]))
o[mask] = eimum
eimum = np.squeeze(o).astype(bool)

vTmix = v.T
vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
_, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG,
tes, combmode, ref_img,
mmixN=vTmixN, full_sel=False)
ctb = ctb[ctb[:, 0].argsort(), :]
ctb = np.vstack([ctb.T[:3], sp]).T

# Save state
lgr.info('++ Saving PCA')
pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb,
'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum}
try:
with open('pcastate.pkl', 'wb') as handle:
pickle.dump(pcastate, handle)
except TypeError:
lgr.warning('++ Could not save PCA solution.')

else: # if loading existing state
lgr.info('++ Loading PCA')
with open('pcastate.pkl', 'rb') as handle:
pcastate = pickle.load(handle)
u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
ctb, eigelb = pcastate['ctb'], pcastate['eigelb']
spmin, spcum = pcastate['spmin'], pcastate['spcum']

np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1])
np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T)

kappas = ctb[ctb[:, 1].argsort(), 1]
rhos = ctb[ctb[:, 2].argsort(), 2]
fmin, fmid, fmax = utils.getfbounds(n_echos)
kappa_thr = np.average(sorted([fmin, model.getelbow_mod(kappas, val=True)/2, fmid]),
weights=[kdaw, 1, 1])
rho_thr = np.average(sorted([fmin, model.getelbow_cons(rhos, val=True)/2, fmid]),
weights=[rdaw, 1, 1])
if int(kdaw) == -1:
kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2]
kappa_thr = kappas_lim[model.getelbow_mod(kappas_lim)]
rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
rho_thr = rhos_lim[model.getelbow_mod(rhos_lim)]
stabilize = True
if int(kdaw) != -1 and int(rdaw) == -1:
rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
rho_thr = rhos_lim[model.getelbow_mod(rhos_lim)]

is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int)
is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int)
is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int)
is_his = np.array(ctb[:, 3] > spmin, dtype=np.int)
is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int)
is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int)
pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2
if stabilize:
temp7 = np.array(spcum < 0.95, dtype=np.int)
temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int)
temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int)
pcscore = pcscore * temp7 * temp8 * temp9

pcsel = pcscore > 0
dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v)

n_components = s[pcsel].shape[0]
lgr.info('++ Selected {0} components. Kappa threshold: {1:.02f}, '
'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr))

dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries
dd = stats.zscore(dd, axis=None) # variance normalize everything

return n_components, dd


def tedica(n_components, dd, conv, fixed_seed, cost, final_cost):
"""
Performs ICA on `dd` and returns mixing matrix
Parameters
----------
n_components : int
Number of components retained from PCA decomposition
dd : (S x E x T) np.ndarray
Dimensionally-reduced functional data, where `S` is samples, `E` is
echos, and `T` is time
conv : float
Convergence limit for ICA
fixed_seed : int
Seed for ensuring reproducibility of ICA results
initcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional
Initial cost function for ICA
finalcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional
Final cost function for ICA
Returns
-------
mmix : (C x T) np.ndarray
Mixing matrix for converting input data to component space, where `C`
is components and `T` is the same as in `dd`
Notes
-----
Uses `mdp` implementation of FastICA for decomposition
"""

import mdp
climit = float(conv)
mdp.numx_rand.seed(fixed_seed)
icanode = mdp.nodes.FastICANode(white_comp=n_components, approach='symm', g=cost,
fine_g=final_cost, coarse_limit=climit*100,
limit=climit, verbose=True)
icanode.train(dd)
smaps = icanode.execute(dd) # noqa
mmix = icanode.get_recmatrix().T
mmix = stats.zscore(mmix, axis=0)
return mmix
Loading