-
-
Notifications
You must be signed in to change notification settings - Fork 478
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify LDA input parameterization #143
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,28 +2,26 @@ data { | |
int<lower=2> K; // num topics | ||
int<lower=2> V; // num words | ||
int<lower=1> M; // num docs | ||
int<lower=1> N; // total word instances | ||
int<lower=1,upper=V> w[N]; // word n | ||
int<lower=1,upper=M> doc[N]; // doc ID for word n | ||
int<lower=0> corpus[M,V]; // word freq matrix, doc x word | ||
vector<lower=0>[V] beta; // word prior | ||
} | ||
parameters { | ||
vector[K] mu; // topic mean | ||
corr_matrix[K] Omega; // correlation matrix | ||
vector<lower=0>[K] sigma; // scales | ||
vector[K] eta[M]; // logit topic dist for doc m | ||
vector[K] eta[M]; // logit topic dist for doc m | ||
simplex[V] phi[K]; // word dist for topic k | ||
} | ||
transformed parameters { | ||
simplex[K] theta[M]; // simplex topic dist for doc m | ||
cov_matrix[K] Sigma; // covariance matrix | ||
for (m in 1:M) | ||
theta[m] <- softmax(eta[m]); | ||
theta[m] = softmax(eta[m]); | ||
for (m in 1:K) { | ||
Sigma[m,m] <- sigma[m] * sigma[m] * Omega[m,m]; | ||
Sigma[m,m] = sigma[m] * sigma[m] * Omega[m,m]; | ||
for (n in (m+1):K) { | ||
Sigma[m,n] <- sigma[m] * sigma[n] * Omega[m,n]; | ||
Sigma[n,m] <- Sigma[m,n]; | ||
Sigma[m,n] = sigma[m] * sigma[n] * Omega[m,n]; | ||
Sigma[n,m] = Sigma[m,n]; | ||
} | ||
} | ||
} | ||
|
@@ -38,10 +36,16 @@ model { | |
for (m in 1:M) | ||
eta[m] ~ multi_normal(mu,Sigma); | ||
// token probabilities | ||
for (n in 1:N) { | ||
real gamma[K]; | ||
for (k in 1:K) | ||
gamma[k] <- log(theta[doc[n],k]) + log(phi[k,w[n]]); | ||
increment_log_prob(log_sum_exp(gamma)); // likelihood | ||
for (i in 1:M) { | ||
for (j in 1:V) { | ||
int count = corpus[i,j]; | ||
real gamma[K]; | ||
if (count > 0) { | ||
for (k in 1:K) { | ||
gamma[k] = (log(theta[i,k]) + log(phi[k,j]))*count; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than looping to define
The loop works for the
|
||
increment_log_prob(log_sum_exp(gamma)); // likelihood | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for replacing
<-
--- that would be a good change for the original model, as well as replacingincrement_log_prob
withtarget +=
.square(sigma[m])
will be a bit more efficient, as would besigma[m]^2
.