use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method sampleInParentDoc.
protected void sampleInParentDoc(_Doc d) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
int wid, tid;
double normalizedProb;
for (_Word w : pDoc.getWords()) {
tid = w.getTopic();
wid = w.getIndex();
pDoc.m_sstat[tid]--;
pDoc.m_topic_stat[tid]--;
pDoc.m_wordTopic_stat[tid][wid]--;
if (!m_collectCorpusStats)
word_topic_sstat[tid][wid]--;
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = wordTopicProb(tid, wid, pDoc);
double pTopicPDoc = topicInDocProb(tid, pDoc);
m_topicProbCache[tid] = pWordTopic * pTopicPDoc;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < number_of_topics; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb <= 0)
break;
}
if (tid == number_of_topics)
tid--;
w.setTopic(tid);
pDoc.m_sstat[tid]++;
pDoc.m_topic_stat[tid]++;
pDoc.m_wordTopic_stat[tid][wid]++;
if (!m_collectCorpusStats)
word_topic_sstat[tid][wid]++;
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method sampleInChildDoc.
protected void sampleInChildDoc(_ChildDoc d) {
int wid, tid;
double normalizedProb;
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d.m_parentDoc;
for (_Word w : d.getWords()) {
tid = w.getTopic();
wid = w.getIndex();
pDoc.m_wordTopic_stat[tid][wid]--;
pDoc.m_topic_stat[tid]--;
d.m_sstat[tid]--;
if (!m_collectCorpusStats)
word_topic_sstat[tid][wid]--;
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = wordTopicProb(tid, wid, pDoc);
double pTopic = topicInDocProb(tid, d);
m_topicProbCache[tid] = pWordTopic * pTopic;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < m_topicProbCache.length; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb <= 0)
break;
}
if (tid == m_topicProbCache.length)
tid--;
w.setTopic(tid);
d.m_sstat[tid]++;
pDoc.m_topic_stat[tid]++;
pDoc.m_wordTopic_stat[tid][wid]++;
if (!m_collectCorpusStats)
word_topic_sstat[tid][wid]--;
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method cal_logLikelihood_partial4Parent.
protected double cal_logLikelihood_partial4Parent(_ParentDoc4DCM d) {
double likelihood = 0;
for (_Word w : d.getWords()) {
int wid = w.getIndex();
double wordLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
wordLikelihood += d.m_topics[k] * d.m_wordTopic_prob[k][wid];
}
likelihood += Math.log(wordLikelihood);
}
return likelihood;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA method cal_logLikelihood4Partial.
protected double cal_logLikelihood4Partial(_Doc d) {
double docLogLikelihood = 0;
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (_Word w : DCMDoc.getTestWords()) {
int wid = w.getIndex();
double wordLogLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
double wordPerTopicLikelihood = DCMDoc.m_topics[k] * DCMDoc.m_wordTopic_prob[k][wid];
wordLogLikelihood += wordPerTopicLikelihood;
}
docLogLikelihood += Math.log(wordLogLikelihood);
}
return docLogLikelihood;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class sparseClusterDCMLDA method initialize_probability.
protected void initialize_probability(Collection<_Doc> collection) {
m_clusterStats = new double[m_clusterNum];
m_clusterProb = new double[m_clusterNum];
m_clusterSamplingCache = new double[m_clusterNum];
m_alpha = new double[number_of_topics];
m_beta = new double[number_of_topics][vocabulary_size];
m_totalAlpha = 0;
m_totalBeta = new double[number_of_topics];
m_alphaAuxilary = new double[number_of_topics];
m_clusterTopicWordProb = new double[m_clusterNum][number_of_topics][vocabulary_size];
m_clusterTopicWordStats = new double[m_clusterNum][number_of_topics][vocabulary_size];
Arrays.fill(m_clusterStats, 0);
Arrays.fill(m_clusterProb, 0);
Arrays.fill(m_clusterSamplingCache, 0);
m_clusterTopicProb = new double[m_clusterNum][number_of_topics];
m_clusterTopicStats = new double[m_clusterNum][number_of_topics];
for (int c = 0; c < m_clusterNum; c++) {
Arrays.fill(m_clusterTopicStats[c], 0);
Arrays.fill(m_clusterTopicProb[c], 0);
for (int k = 0; k < number_of_topics; k++) {
Arrays.fill(m_clusterTopicWordStats[c][k], 0);
Arrays.fill(m_clusterTopicWordProb[c][k], 0);
}
}
initialAlphaBeta();
for (_Doc d : collection) {
((_Doc4SparseDCMLDA) d).setTopics4GibbsCluster(number_of_topics, m_alpha, m_clusterNum, vocabulary_size);
int cID = ((_Doc4SparseDCMLDA) d).m_clusterIndicator;
m_clusterStats[cID]++;
for (_Word w : d.getWords()) {
int tid = w.getTopic();
int wid = w.getIndex();
m_clusterTopicWordStats[cID][tid][wid]++;
m_clusterTopicStats[cID][tid]++;
word_topic_sstat[tid][wid]++;
}
}
imposePrior();
}
Aggregations