use of structures._RankItem in project IR_Base by Linda-sunshine.
the class CLRWithHDP method printTopWords.
void printTopWords(_HDPThetaStar cluster) {
MyPriorityQueue<_RankItem> wordRanker = new MyPriorityQueue<_RankItem>(30);
double[] lmStat = cluster.getLMStat();
double[] phi = cluster.getModel();
int[] tfs = m_tf_count[cluster.getIndex()];
double tf;
// features with positive/negative weights (skip the bias term)
System.out.format("Cluster %d (%d)\n[positive]: ", cluster.getIndex(), cluster.getMemSize());
for (int i = 1; i < phi.length; i++) {
tf = tfs[i - 1] == 0 ? 0.1 : tfs[i - 1];
// top positive words with expected polarity
wordRanker.add(new _RankItem(i, phi[i] * tf));
}
for (_RankItem it : wordRanker) System.out.format("%s:%.3f\t", m_features[it.m_index], phi[it.m_index]);
// features with negative weights
wordRanker.clear();
System.out.format("\n[negative]: ");
for (int i = 1; i < phi.length; i++) {
tf = tfs[i - 1] == 0 ? 0.1 : tfs[i - 1];
// top negative words
wordRanker.add(new _RankItem(i, -phi[i] * tf));
}
for (_RankItem it : wordRanker) System.out.format("%s:%.3f\t", m_features[it.m_index], phi[it.m_index]);
// features with highest frequency
wordRanker.clear();
System.out.format("\n[popular]: ");
for (int i = 0; i < lmStat.length; i++) // top positive words with expected polarity
wordRanker.add(new _RankItem(i, lmStat[i]));
for (_RankItem it : wordRanker) System.out.format("%s:%.1f\t", m_lmFeatures.get(it.m_index), lmStat[it.m_index]);
System.out.println();
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class sparseClusterDCMLDA_test method printTopBeta.
protected void printTopBeta(int k, String topBetaFile) {
System.out.println("TopWord FilePath:" + topBetaFile);
Arrays.fill(m_sstat, 0);
for (_Doc d : m_trainSet) {
for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
}
Utils.L1Normalization(m_sstat);
try {
PrintWriter topWordWriter = new PrintWriter(new File(topBetaFile));
for (int i = 0; i < m_beta.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
topWordWriter.write("\n");
}
topWordWriter.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class sparseClusterDCMLDA_test method printTopWord.
protected void printTopWord(int k, String topWordFile) {
System.out.println("TopWord FilePath:" + topWordFile);
Arrays.fill(m_sstat, 0);
for (_Doc d : m_trainSet) {
for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
}
Utils.L1Normalization(m_sstat);
try {
PrintWriter topWordWriter = new PrintWriter(new File(topWordFile));
for (int i = 0; i < topic_term_probabilty.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
topWordWriter.write("\n");
}
topWordWriter.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class sparseDCMLDA_test method printTopBeta.
protected void printTopBeta(int k, String topBetaFile) {
System.out.println("TopWord FilePath:" + topBetaFile);
Arrays.fill(m_sstat, 0);
for (_Doc d : m_trainSet) {
for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
}
Utils.L1Normalization(m_sstat);
try {
PrintWriter topWordWriter = new PrintWriter(new File(topBetaFile));
for (int i = 0; i < m_beta.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
topWordWriter.write("\n");
}
topWordWriter.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class LDA_Gibbs_test method printTopWords.
public void printTopWords(int k, String betaFile) {
double loglikelihood = calculate_log_likelihood();
System.out.format("Final Log Likelihood %.3f\t", loglikelihood);
String filePrefix = betaFile.replace("topWords.txt", "");
debugOutput(filePrefix);
Arrays.fill(m_sstat, 0);
System.out.println("print top words");
for (_Doc d : m_trainSet) {
for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
}
Utils.L1Normalization(m_sstat);
try {
System.out.println("beta file");
PrintWriter betaOut = new PrintWriter(new File(betaFile));
for (int i = 0; i < topic_term_probabilty.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
betaOut.format("Topic %d(%.3f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) {
betaOut.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
System.out.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
}
betaOut.println();
System.out.println();
}
betaOut.flush();
betaOut.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
Aggregations