use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class DCMLDA method calculate_log_likelihood4Perplexity.
protected double calculate_log_likelihood4Perplexity(_Doc d) {
double likelihood = 0;
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (_Word w : DCMDoc.getWords()) {
int wid = w.getIndex();
double wordLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
wordLikelihood += DCMDoc.m_topics[k] * DCMDoc.m_wordTopic_prob[k][wid];
}
likelihood += Math.log(wordLikelihood);
}
return likelihood;
}
use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class DCMLDA_test method printWordTopicDistribution.
protected void printWordTopicDistribution(_Doc d, File wordTopicDistributionFolder, int k) {
String wordTopicDistributionFile = d.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(wordTopicDistributionFolder, wordTopicDistributionFile));
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (int i = 0; i < number_of_topics; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int v = 0; v < vocabulary_size; v++) {
String featureName = m_corpus.getFeature(v);
double wordProb = DCMDoc.m_wordTopic_prob[i][v];
_RankItem ri = new _RankItem(featureName, wordProb);
fVector.add(ri);
}
pw.format("Topic %d(%.5f):\t", i, d.m_topics[i]);
for (_RankItem it : fVector) pw.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
pw.write("\n");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
Aggregations