use of structures._Word in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method sampleInParentDoc.
protected void sampleInParentDoc(_Doc d) {
_ParentDoc pDoc = (_ParentDoc) d;
int wid, tid;
double normalizedProb;
for (_Word w : pDoc.getWords()) {
wid = w.getIndex();
tid = w.getTopic();
pDoc.m_sstat[tid]--;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]--;
m_sstat[tid]--;
}
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = parentWordByTopicProb(tid, wid);
double pTopicPDoc = parentTopicInDocProb(tid, pDoc);
double pTopicCDoc = parentChildInfluenceProb(tid, pDoc);
m_topicProbCache[tid] = pWordTopic * pTopicPDoc * pTopicCDoc;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < number_of_topics; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb < 0)
break;
}
if (tid == number_of_topics)
tid--;
w.setTopic(tid);
pDoc.m_sstat[tid]++;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]++;
m_sstat[tid]++;
}
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA method initTestDoc.
@Override
public void initTestDoc(_Doc d) {
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (int k = 0; k < number_of_topics; k++) {
Arrays.fill(DCMDoc.m_wordTopic_prob[k], 0);
}
int testLength = (int) (m_testWord4PerplexityProportion * DCMDoc.getTotalDocLength());
DCMDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
for (_Word w : DCMDoc.getWords()) {
int wid = w.getIndex();
int tid = w.getTopic();
DCMDoc.m_wordTopic_stat[tid][wid]++;
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA method calculate_E_step.
@Override
public double calculate_E_step(_Doc d) {
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
DCMDoc.permutation();
double p;
int wid, tid;
for (_Word w : DCMDoc.getWords()) {
wid = w.getIndex();
tid = w.getTopic();
// remove the word's topic assignment
DCMDoc.m_sstat[tid]--;
DCMDoc.m_wordTopic_stat[tid][wid]--;
if (m_collectCorpusStats)
word_topic_sstat[tid][wid]--;
// perform random sampling
p = 0;
for (tid = 0; tid < number_of_topics; tid++) {
m_topicProbCache[tid] = topicInDocProb(tid, DCMDoc) * wordTopicProb(tid, wid, DCMDoc);
p += m_topicProbCache[tid];
}
p *= m_rand.nextDouble();
tid = 0;
while (p > 0 && tid < number_of_topics - 1) {
p -= m_topicProbCache[tid];
tid++;
}
// assign the selected topic to word
w.setTopic(tid);
DCMDoc.m_sstat[tid]++;
DCMDoc.m_wordTopic_stat[tid][wid]++;
if (m_collectCorpusStats)
word_topic_sstat[tid][wid]++;
}
return 0;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA method calculate_log_likelihood4Perplexity.
protected double calculate_log_likelihood4Perplexity(_Doc d) {
double likelihood = 0;
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (_Word w : DCMDoc.getWords()) {
int wid = w.getIndex();
double wordLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
wordLikelihood += DCMDoc.m_topics[k] * DCMDoc.m_wordTopic_prob[k][wid];
}
likelihood += Math.log(wordLikelihood);
}
return likelihood;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA_test method printParentTopicAssignment.
protected void printParentTopicAssignment(_Doc d, File topicFolder) {
String topicAssignmentFile = d.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Word w : d.getWords()) {
int index = w.getIndex();
int topic = w.getTopic();
String featureName = m_corpus.getFeature(index);
pw.print(featureName + ":" + topic + "\t");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
Aggregations