Skip to content

Commit

Permalink
New file structure
Browse files Browse the repository at this point in the history
  • Loading branch information
emdupre committed May 11, 2018
1 parent 7235645 commit df4a829
Show file tree
Hide file tree
Showing 17 changed files with 2,127 additions and 2,037 deletions.
8 changes: 6 additions & 2 deletions tedana/interfaces/__init__.py → tedana/decomp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:

from .t2smap import (t2sadmap, make_optcom)
from .eigendecomp import (
tedpca, tedica, eimask,
)


__all__ = [
't2sadmap', 'make_optcom']
'tedpca', 'tedica', 'eimask'
]
255 changes: 255 additions & 0 deletions tedana/decomp/eigendecomp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
import pickle
import numpy as np
import os.path as op
from scipy import stats
from tedana import model, utils

import logging
logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO)
lgr = logging.getLogger(__name__)

F_MAX = 500
Z_MAX = 8


def eimask(dd, ees=None):
"""
Returns mask for data between [0.001, 5] * 98th percentile of dd
Parameters
----------
dd : (S x E x T) array_like
Input data, where `S` is samples, `E` is echos, and `T` is time
ees : (N,) list
Indices of echos to assess from `dd` in calculating output
Returns
-------
imask : (S x N) np.ndarray
Boolean array denoting
"""

if ees is None:
ees = range(dd.shape[1])
imask = np.zeros([dd.shape[0], len(ees)], dtype=bool)
for ee in ees:
lgr.info('++ Creating eimask for echo {}'.format(ee))
perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98,
interpolation_method='lower')
lthr, hthr = 0.001 * perc98, 5 * perc98
lgr.info('++ Eimask threshold boundaries: '
'{:.03f} {:.03f}'.format(lthr, hthr))
m = dd[:, ee, :].mean(axis=1)
imask[np.logical_and(m > lthr, m < hthr), ee] = True

return imask


def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize,
ref_img, tes, kdaw, rdaw, ste=0, mlepca=True):
"""
Performs PCA on `catd` and uses TE-dependence to dimensionally reduce data
Parameters
----------
catd : (S x E x T) array_like
Input functional data
OCcatd : (S x T) array_like
Optimally-combined time series data
combmode : {'t2s', 'ste'} str
How optimal combination of echos should be made, where 't2s' indicates
using the method of Posse 1999 and 'ste' indicates using the method of
Poser 2006
mask : (S,) array_like
Boolean mask array
stabilize : bool
Whether to attempt to stabilize convergence of ICA by returning
dimensionally-reduced data from PCA and component selection.
ref_img : str or img_like
Reference image to dictate how outputs are saved to disk
tes : list
List of echo times associated with `catd`, in milliseconds
kdaw : float
Dimensionality augmentation weight for Kappa calculations
rdaw : float
Dimensionality augmentation weight for Rho calculations
ste : int or list-of-int, optional
Which echos to use in PCA. Values -1 and 0 are special, where a value
of -1 will indicate using all the echos and 0 will indicate using the
optimal combination of the echos. A list can be provided to indicate
a subset of echos. Default: 0
mlepca : bool, optional
Whether to use the method originally explained in Minka, NIPS 2000 for
guessing PCA dimensionality instead of a traditional SVD. Default: True
Returns
-------
n_components : int
Number of components retained from PCA decomposition
dd : (S x E x T) np.ndarray
Dimensionally-reduced functional data
"""

n_samp, n_echos, n_vols = catd.shape
ste = np.array([int(ee) for ee in str(ste).split(',')])

if len(ste) == 1 and ste[0] == -1:
lgr.info('++ Computing PCA of optimally combined multi-echo data')
d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :]
elif len(ste) == 1 and ste[0] == 0:
lgr.info('++ Computing PCA of spatially concatenated multi-echo data')
d = catd[mask].astype('float64')
else:
lgr.info('++ Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste]))
d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')

eim = np.squeeze(eimask(d))
d = np.squeeze(d[eim])

dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts
dz = (dz - dz.mean()) / dz.std() # var normalize everything

if not op.exists('pcastate.pkl'):
# do PC dimension selection and get eigenvalue cutoff
if mlepca:
from sklearn.decomposition import PCA
ppca = PCA(n_components='mle', svd_solver='full')
ppca.fit(dz)
v = ppca.components_
s = ppca.explained_variance_
u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
else:
u, s, v = np.linalg.svd(dz, full_matrices=0)

# actual variance explained (normalized)
sp = s / s.sum()
eigelb = model.getelbow_mod(sp, val=True)

spdif = np.abs(np.diff(sp))
spdifh = spdif[(len(spdif)//2):]
spdthr = np.mean([spdifh.max(), spdif.min()])
spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
spcum = np.cumsum(sp)

# Compute K and Rho for PCA comps
eimum = np.atleast_2d(eim)
eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
eimum = eimum.prod(axis=1)
o = np.zeros((mask.shape[0], *eimum.shape[1:]))
o[mask] = eimum
eimum = np.squeeze(o).astype(bool)

vTmix = v.T
vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
_, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG,
tes, combmode, ref_img,
mmixN=vTmixN, full_sel=False)
ctb = ctb[ctb[:, 0].argsort(), :]
ctb = np.vstack([ctb.T[:3], sp]).T

# Save state
lgr.info('++ Saving PCA')
pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb,
'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum}
try:
with open('pcastate.pkl', 'wb') as handle:
pickle.dump(pcastate, handle)
except TypeError:
lgr.warning('++ Could not save PCA solution.')

else: # if loading existing state
lgr.info('++ Loading PCA')
with open('pcastate.pkl', 'rb') as handle:
pcastate = pickle.load(handle)
u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
ctb, eigelb = pcastate['ctb'], pcastate['eigelb']
spmin, spcum = pcastate['spmin'], pcastate['spcum']

np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1])
np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T)

kappas = ctb[ctb[:, 1].argsort(), 1]
rhos = ctb[ctb[:, 2].argsort(), 2]
fmin, fmid, fmax = utils.getfbounds(n_echos)
kappa_thr = np.average(sorted([fmin, model.getelbow_mod(kappas, val=True)/2, fmid]),
weights=[kdaw, 1, 1])
rho_thr = np.average(sorted([fmin, model.getelbow_cons(rhos, val=True)/2, fmid]),
weights=[rdaw, 1, 1])
if int(kdaw) == -1:
kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2]
kappa_thr = kappas_lim[model.getelbow_mod(kappas_lim)]
rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
rho_thr = rhos_lim[model.getelbow_mod(rhos_lim)]
stabilize = True
if int(kdaw) != -1 and int(rdaw) == -1:
rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
rho_thr = rhos_lim[model.getelbow_mod(rhos_lim)]

is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int)
is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int)
is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int)
is_his = np.array(ctb[:, 3] > spmin, dtype=np.int)
is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int)
is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int)
pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2
if stabilize:
temp7 = np.array(spcum < 0.95, dtype=np.int)
temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int)
temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int)
pcscore = pcscore * temp7 * temp8 * temp9

pcsel = pcscore > 0
dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v)

n_components = s[pcsel].shape[0]
lgr.info('++ Selected {0} components. Kappa threshold: {1:.02f}, '
'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr))

dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries
dd = stats.zscore(dd, axis=None) # variance normalize everything

return n_components, dd


def tedica(n_components, dd, conv, fixed_seed, cost, final_cost):
"""
Performs ICA on `dd` and returns mixing matrix
Parameters
----------
n_components : int
Number of components retained from PCA decomposition
dd : (S x E x T) np.ndarray
Dimensionally-reduced functional data, where `S` is samples, `E` is
echos, and `T` is time
conv : float
Convergence limit for ICA
fixed_seed : int
Seed for ensuring reproducibility of ICA results
initcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional
Initial cost function for ICA
finalcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional
Final cost function for ICA
Returns
-------
mmix : (C x T) np.ndarray
Mixing matrix for converting input data to component space, where `C`
is components and `T` is the same as in `dd`
Notes
-----
Uses `mdp` implementation of FastICA for decomposition
"""

import mdp
climit = float(conv)
mdp.numx_rand.seed(fixed_seed)
icanode = mdp.nodes.FastICANode(white_comp=n_components, approach='symm', g=cost,
fine_g=final_cost, coarse_limit=climit*100,
limit=climit, verbose=True)
icanode.train(dd)
smaps = icanode.execute(dd) # noqa
mmix = icanode.get_recmatrix().T
mmix = stats.zscore(mmix, axis=0)
return mmix
Loading

0 comments on commit df4a829

Please sign in to comment.