use of structures._RankItem in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printTopWordsDistribution.
protected void printTopWordsDistribution(int topK, String topWordFile) {
Arrays.fill(m_sstat, 0);
System.out.println("print top words");
for (_Doc d : m_trainSet) {
for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
}
Utils.L1Normalization(m_sstat);
try {
System.out.println("top word file");
PrintWriter betaOut = new PrintWriter(new File(topWordFile));
for (int i = 0; i < topic_term_probabilty.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(topK);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
betaOut.format("Topic %d(%.3f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) {
betaOut.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
System.out.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
}
betaOut.println();
System.out.println();
}
betaOut.flush();
betaOut.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class DCMCorrLDA method printTopWords.
public void printTopWords(int k, String betaFile) {
try {
PrintWriter topWordWriter = new PrintWriter(new File(betaFile));
for (int i = 0; i < m_beta.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
topWordWriter.write("\n");
}
topWordWriter.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class DCMCorrLDA_test method printWordTopicDistribution.
protected void printWordTopicDistribution(_Doc d, File wordTopicDistributionFolder, int k) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
String wordTopicDistributionFile = pDoc.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(wordTopicDistributionFolder, wordTopicDistributionFile));
for (int i = 0; i < number_of_topics; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int v = 0; v < vocabulary_size; v++) {
String featureName = m_corpus.getFeature(v);
double wordProb = pDoc.m_wordTopic_prob[i][v];
_RankItem ri = new _RankItem(featureName, wordProb);
fVector.add(ri);
}
pw.format("Topic %d(%.5f):\t", i, pDoc.m_topics[i]);
for (_RankItem it : fVector) pw.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
pw.write("\n");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method printTopWords.
public void printTopWords(int k, String betaFile) {
try {
PrintWriter topWordWriter = new PrintWriter(new File(betaFile));
for (int i = 0; i < m_beta.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
topWordWriter.write("\n");
}
topWordWriter.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures._RankItem in project IR_Base by Linda-sunshine.
the class FeatureSelector method CHI.
// Feature Selection -- CHI.
public void CHI(HashMap<String, _stat> featureStat, int[] classMemberNo) {
m_selectedFeatures.clear();
int classNo = classMemberNo.length;
int N = Utils.sumOfArray(classMemberNo), sumDF;
double[] X2tc = new double[classNo];
double X2avg = 0;
for (String f : featureStat.keySet()) {
// Filter the features which have smaller DFs.
_stat temp = featureStat.get(f);
sumDF = Utils.sumOfArray(temp.getDF());
if (sumDF > m_minDF && sumDF < m_maxDF) {
X2avg = 0;
for (int i = 0; i < classNo; i++) {
X2tc[i] = Utils.ChiSquare(N, sumDF, temp.getDF()[i], classMemberNo[i]);
X2avg += X2tc[i] * classMemberNo[i] / N;
}
// X2max = Utils.maxOfArrayValue(X2tc);
m_selectedFeatures.add(new _RankItem(f, X2avg));
}
}
}
Aggregations