-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEM_WDF.m
189 lines (178 loc) · 5.54 KB
/
EM_WDF.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
function [obj] = EM_WDF(X,WS,K,varargin)
% [W,M,V,L,WS,X] = EM_WDF(X,WSampesAprior,K,Init)
%
% EM algorithm for weigthed data GMM WITH fixed weight
%
% Inputs:
% X(n,d) - input data, n=number of observations, d=dimension of variable
% WS -input data weight
% K - maximum number of Gaussian components allowed excluding out-liner
% Init - structure of initial W, M, V: Init.W, Init.M, Init.V([] for none)
%
% Ouputs: Obj: a struct with the following elements
% PComponents(1,k) - estimated weights of mixture compontents
% mu(d,k) - estimated mean vectors
% Sigma(d,d,k) - estimated covariance matrices
% L - log likelihood of estimates
% Wbar - expecation on the weights after EM convergence
% E(N,k) - posterior
% Class - MAP class labels
% Iters - number of iterations till convergence
% NlogL - negative of log likelihood
% BIC - Bayesian information criterion (BIC) or Schwarz criterion
% AIC - Akaike information criterion
% $Author: Israel D. Gebru $ $Date: Dec 1, 2014$ $Revision: 1.0 $
% $Date: May 1, 2015$ $Revision: 2.0 $
% Copyright: Perception Team, INRIA-Grenoble
% Email: [email protected]
% The EM algorithm is described in the paper:
% (1) Israel D.,Xavier A., Florence F., Radu H., "EM for Weighted-Data Clustering" ,
% ~~
% (2) Israel D.,Xavier A., Radu H.,Florence F., "Audio-visual speaker localization via weighted clustering",
% IEEE International Workshop onMachine Learning for Signal Processing (MLSP), 2014.
[Init,tol,maxIter,Regularize,CovType] = process_options(varargin, 'Init', [],'tol',1e-2,'maxIter',100,'Regularize',0,'CovType','full');
if(strcmp(CovType,'full'))
covtype=2;
else % default is diag
CovType='diag';
covtype=1;
end
[n, d]=size(X);
%% Inititilize
% Initilize Mean, Cov and component mixing weight by K-means, if not provided from as input
if isempty(Init),
[Init.Wc,Init.M,Init.V] = EMInit(X,K,'kmeans');
end
% if provide in the input arg as a struct
W = Init.Wc;
M = Init.M;
V = Init.V;
%% Compute initial Expectation and Log likelihood
Ln = Likelihood(X,K,W,M,V,WS);
Lo = 2*Ln;
NlogL = -inf;
%% EM algorithm
niter = 1; L(niter)= Ln;
while ((niter<=maxIter) && (abs(100*(Ln-Lo)/Lo)>tol))
[E,ll] = Expectation(X,K,W,M,V,WS,covtype); % E-step
[W,M,V,K] = Maximization(X,K,E,WS,Regularize,covtype); % M-step
niter = niter + 1;
L(niter)=ll;
NlogL = ll;
end
% Store results in object
obj.DistName = 'GMM-WD Fixed';
obj.NDimensions = d;
obj.NComponents = K;
obj.PComponents = W;
obj.mu = M;
obj.Sigma = V;
obj.Wbar = WS;
obj.E = E;
[~, idx] = max(E,[],2);
obj.Class = idx;
obj.Iters = niter;
obj.NlogL = NlogL;
obj.L = L;
obj.RegV = Regularize;
if covtype == 1
obj.CovType = 'diag';
nParam = obj.NDimensions * K;
else
obj.CovType = 'full';
nParam = K*obj.NDimensions * (obj.NDimensions+1)/2;
end
nParam = nParam + K-1 + K * obj.NDimensions;
obj.BIC = 2*NlogL + nParam*log(n);
obj.AIC = 2*NlogL + 2*nParam;
end
%% Expectation Step
function [E,ll] = Expectation(X,k,W,M,V,WS,covType)
[n,d] = size(X);
log_prior = log(W);
log_lh = zeros(n,k);
Wbar_nk = repmat(WS,1,k);
for i=1:k,
if covType==2
[L,err] = cholcov(V(:,:,i),0);
if err ~= 0
error(message('stats:mvnpdf:BadMatrixSigma'));
end
diagL = diag(L);
logDetSigma = 2*sum(log(diagL));
else
L = sqrt(diag(V(:,:,i)));
logDetSigma = sum(log(diag(V(:,:,i))));
end
dXM = bsxfun(@minus, X, M(:,i)');
if covType == 2
xRinv = dXM /L ;
else
xRinv = bsxfun(@times,dXM , (1./ L)');
end
tmp = Wbar_nk(:,i) .* sum(xRinv.^2, 2);
log_lh(:,i) = -0.5 * tmp + (-0.5 *logDetSigma + log_prior(i)) - d*log(2*pi)/2 + d*log(Wbar_nk(:,i))/2;
end
maxll = max (log_lh,[],2);
%minus maxll to avoid underflow
post = exp(bsxfun(@minus, log_lh, maxll));
density = sum(post,2);
%normalize posteriors
E = bsxfun(@rdivide, post, density);
logpdf = log(density) + maxll;
ll = sum(logpdf) ;
end
% End of Expectation %
%% Maximization Step
function [W,M,V,k] = Maximization(X,k,E,WS,Regularize,covType)
[n,d] = size(X);
Wbar_nk = repmat(WS,1,k);
M = zeros(d,k); V = zeros(d,d,k);
%for each component Compute new mean, covar and componet weight
for i=1:k,
den = Wbar_nk(:,i).*E(:,i);
tmp = den'*X;
M(:,i) = tmp'/sum(den,1);
dxM = bsxfun(@minus,X,M(:,i)');
if(covType ==2) % full cov
dxM = bsxfun(@times,sqrt(E(:,i).*Wbar_nk(:,i)),dxM);
V(:,:,i) = V(:,:,i) + dxM'*dxM;
else % diagonal
V(:,:,i) = V(:,:,i) + diag(den'*(dxM.^2));
end
%normalize the new covariance + regularzation
V(:,:,i) = V(:,:,i)/sum(E(:,i),1)+ eye(d)*(Regularize);
end
% mixing weights
W = sum(E,1)/n;
end
% End of Maximization %
function LL = Likelihood(X,k,W,M,V,WS)
[n,d] = size(X);
a = (2*pi)^(0.5*d);
LL = 0;
% variable to store the determinant of the covariances
S = zeros(1,k);
% variables to store the inverse convariances
iV = zeros(d,d,k);
% for each mixture component
for j=1:k,
S(j) = sqrt(det(V(:,:,j)));
iV(:,:,j) = inv(V(:,:,j));
end
% for each observation
%for each observations
for i=1:n
p=0;
for j=1:k
if(W(j)>0)
b = S(j)/(WS(i)+eps);
% Mahalobis distanace
dXM = X(i,:)'-M(:,j);
MD = dXM'*(iV(:,:,j)/WS(i))*dXM;
p = p + W(j)*exp(-0.5*MD)/(a*b);
end
end
LL= LL+ log(1-p);
end
end