-
Notifications
You must be signed in to change notification settings - Fork 1
/
tsne.py
223 lines (169 loc) · 8.68 KB
/
tsne.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Code taken from https://github.com/hma02/thesne/blob/master/model/tsne.py
And then modified.
"""
import os, sys
import numpy as np
import theano
import theano.tensor as T
from sklearn.utils import check_random_state
from core import kl_cost_var, reverse_kl_cost_var, js_cost_var, \
hellinger_cost_var, chi_square_cost_var, \
p_Yp_Y_var_np, floath, find_sigma
from utils import get_epsilon
from utils_sne import precision_K, K_neighbours, neighbour_accuracy_K, plot_map_c, plot_map_news
def tsne(X, perplexity=30, Y=None, output_dims=2, n_epochs=1000,
initial_lr=1000, final_lr=50, lr_switch=250, init_stdev=1e-3,
sigma_iters=50, initial_momentum=0.95, final_momentum=0.0, lrDecay=100,\
momentum_switch=250, metric='euclidean', random_state=None,
verbose=1, fname=None, color=None, divtype='kl', num_folds=2, datatype='mnist'):
"""Compute projection from a matrix of observations (or distances) using
t-SNE.
Parameters
----------
X : array-like, shape (n_observations, n_features), \
or (n_observations, n_observations) if `metric` == 'precomputed'.
Matrix containing the observations (one per row). If `metric` is
'precomputed', pairwise dissimilarity (distance) matrix.
perplexity : float, optional (default = 30)
Target perplexity for binary search for sigmas.
Y : array-like, shape (n_observations, output_dims), optional \
(default = None)
Matrix containing the starting position for each point.
output_dims : int, optional (default = 2)
Target dimension.
n_epochs : int, optional (default = 1000)
Number of gradient descent iterations.
initial_lr : float, optional (default = 2400)
The initial learning rate for gradient descent.
final_lr : float, optional (default = 200)
The final learning rate for gradient descent.
lr_switch : int, optional (default = 250)
Iteration in which the learning rate changes from initial to final.
This option effectively subsumes early exaggeration.
init_stdev : float, optional (default = 1e-4)
Standard deviation for a Gaussian distribution with zero mean from
which the initial coordinates are sampled.
sigma_iters : int, optional (default = 50)
Number of binary search iterations for target perplexity.
initial_momentum : float, optional (default = 0.5)
The initial momentum for gradient descent.
final_momentum : float, optional (default = 0.8)
The final momentum for gradient descent.
momentum_switch : int, optional (default = 250)
Iteration in which the momentum changes from initial to final.
metric : 'euclidean' or 'precomputed', optional (default = 'euclidean')
Indicates whether `X` is composed of observations ('euclidean')
or distances ('precomputed').
random_state : int or np.RandomState, optional (default = None)
Integer seed or np.RandomState object used to initialize the
position of each point. Defaults to a random seed.
verbose : bool (default = 1)
Indicates whether progress information should be sent to standard
output.
Returns
-------
Y : array-like, shape (n_observations, output_dims)
Matrix representing the projection. Each row (point) corresponds to a
row (observation or distance to other observations) in the input matrix.
"""
N = X.shape[0]
X_shared = theano.shared(np.asarray(X, dtype=floath))
sigma_shared = theano.shared(np.ones(N, dtype=floath))
find_sigma(X_shared, sigma_shared, N, perplexity, sigma_iters, metric, verbose)
sorted_ind_p, pdist = K_neighbours(X, sigma=sigma_shared.get_value(), maxK=10)
rev_sorted_ind_p, pdist = K_neighbours(X, maxK=100, revF=True, sigma=sigma_shared.get_value())
figs_path = './figs/'+datatype+'/'+divtype
result_path = './results/'+datatype+'/'+divtype
embedd_path = './embeddings/'+datatype+'/'+divtype
if not os.path.exists(figs_path): os.makedirs(figs_path)
if not os.path.exists(result_path): os.makedirs(result_path)
if not os.path.exists(embedd_path): os.makedirs(embedd_path)
np.save(result_path+'/'+datatype+'_probM_seed0_v2_perp'+str(perplexity), pdist)
np.save(result_path+'/'+datatype+'_data_sorted_v2_seed0_perp'+str(perplexity), sorted_ind_p)
for i in xrange(1,num_folds):
print '%s FOLD %d' % (divtype, i)
random_state = check_random_state(i)
Y = random_state.normal(0, init_stdev, size=(N, output_dims))
Y_shared = theano.shared(np.asarray(Y, dtype=floath))
Y = find_Y(X_shared, Y_shared, sigma_shared, N, output_dims, \
n_epochs, initial_lr, final_lr, lr_switch, \
init_stdev, initial_momentum, final_momentum, \
momentum_switch, metric, sorted_ind_p, \
rev_sorted_ind_p, verbose, \
fname=fname+'_fold'+str(i), color=color, \
divtype=divtype, lrDecay=lrDecay, \
datatype=datatype)
return Y
def find_Y(X_shared, Y_shared, sigma_shared, N, output_dims, n_epochs,
initial_lr, final_lr, lr_switch, init_stdev, initial_momentum,
final_momentum, momentum_switch, metric, sorted_ind_p, rev_sorted_ind_p,\
verbose=0, fname=None, color=None, divtype='kl', lrDecay=100,\
visLossF=0, naccuracyF=1, datatype='mnist'):
"""Optimize cost wrt Y"""
# Optimization hyperparameters
initial_lr = np.array(initial_lr, dtype=floath)
final_lr = np.array(final_lr, dtype=floath)
initial_momentum = np.array(initial_momentum, dtype=floath)
final_momentum = np.array(final_momentum, dtype=floath)
lr = T.fscalar('lr')
lr_shared = theano.shared(initial_lr)
X = T.fmatrix('X')
Y = T.fmatrix('Y')
Yv = T.fmatrix('Yv')
Yv_shared = theano.shared(np.zeros((N, output_dims), dtype=floath))
sigma = T.fvector('sigma')
momentum = T.fscalar('momentum')
momentum_shared = theano.shared(initial_momentum)
# Cost
if divtype == 'kl':
cost = kl_cost_var(X, Y, sigma, metric)
elif divtype == 'rkl':
cost = reverse_kl_cost_var(X, Y, sigma, metric)
elif divtype == 'js':
cost = js_cost_var(X, Y, sigma, metric)
elif divtype == 'hl':
cost = hellinger_cost_var(X, Y, sigma, metric)
elif divtype == 'ch':
cost = chi_square_cost_var(X, Y, sigma, metric)
# Setting update for Y velocities
grad_Y = T.grad(cost, Y)
norm_gs = abs(grad_Y).sum()
updates = [(Yv_shared, momentum*Yv - lr*grad_Y)]
givens = {X: X_shared, sigma: sigma_shared, Y: Y_shared, Yv: Yv_shared}
update_Yv = theano.function([lr, momentum], [cost, norm_gs], givens=givens, updates=updates)
Y_len = T.mean(T.sum(Y**2, axis=1))
# Setting update for Y
get_y_i = theano.function([], Y, givens={Y: Y_shared})
get_cost_i = theano.function([Y], cost, givens={X: X_shared, sigma: sigma_shared})
givens = {Y: Y_shared, Yv: Yv_shared}
updates = [(Y_shared, Y + Yv)]
update_Y = theano.function([], Y_len, givens=givens, updates=updates)
loss, gnorms = [], []
for epoch in range(n_epochs):
lrY = max(float(get_epsilon(initial_lr, lrDecay, epoch)), min(0.001, initial_lr))
mom = float(get_epsilon(initial_momentum, lrDecay, epoch)) \
if epoch < momentum_switch else 0.85
c, grad_len = update_Yv(lrY, mom)
gnorms.append(grad_len)
y_len = update_Y()
loss.append(c)
if verbose:
projX = np.array(Y_shared.get_value())
if epoch % 25 == 0 or epoch < 20:
projX = np.array(Y_shared.get_value())
np.save('./embeddings/'+fname, projX)
ffname = './figs/'+fname+'_epoch'+str(epoch)+'.pdf'
if datatype == 'sbow':
color_dict = [\
'lightblue', 'darkblue',
'indianred', 'darkred', 'red', 'magenta', 'hotpink',
'silver', 'darkgray', 'gray']
plot_map_news(projX, color, color_dict, ffname)
else:
plot_map_c(projX, color, ffname)
print 'Epoch %d, SNE J %f, |GS| %f, |Y| %f, LRY %f, MOM %f' \
% (epoch, c, grad_len, y_len, lrY, mom)
np.save('./results/'+fname+'_loss', np.asarray(loss))
np.save('./results/'+fname+'_gnorm', np.asarray(gnorms))
return np.array(Y_shared.get_value())