use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method printTopWords.
public void printTopWords(int k, String betaFile) {
try {
PrintWriter topWordWriter = new PrintWriter(new File(betaFile));
for (int i = 0; i < m_beta.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
topWordWriter.write("\n");
}
topWordWriter.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.
the class PairwiseSimCalculator method constructNearestGraph.
void constructNearestGraph() {
_Doc di, dj;
double similarity;
MyPriorityQueue<_RankItem> kUL = new MyPriorityQueue<_RankItem>(m_GFObj.m_k);
MyPriorityQueue<_RankItem> kUU = new MyPriorityQueue<_RankItem>(m_GFObj.m_kPrime);
_Node node;
for (int i = m_start; i < m_end; i++) {
di = m_GFObj.getTestDoc(i);
node = m_GFObj.m_nodeList[i];
// find the nearest unlabeled examples among all candidates since the similarity might not be symmetric
for (int j = 0; j < m_GFObj.m_U; j++) {
if (i == j)
continue;
dj = m_GFObj.getTestDoc(j);
similarity = m_GFObj.getSimilarity(di, dj);
kUU.add(new _RankItem(j, similarity));
}
for (_RankItem it : kUU) node.addUnlabeledEdge(m_GFObj.m_nodeList[it.m_index], it.m_value);
kUU.clear();
// find the nearest labeled examples
for (int j = 0; j < m_GFObj.m_L; j++) {
dj = m_GFObj.getLabeledDoc(j);
similarity = m_GFObj.getSimilarity(di, dj);
kUL.add(new _RankItem(m_GFObj.m_U + j, similarity));
}
for (_RankItem it : kUL) node.addLabeledEdge(m_GFObj.m_nodeList[it.m_index], it.m_value);
kUL.clear();
// sort the edges to accelerate debug output
node.sortEdges();
}
System.out.format("[%d,%d) finished...\n", m_start, m_end);
}
use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.
the class KNN method predict.
@Override
public int predict(_Doc doc) {
Collection<_Doc> docs;
if (m_l <= 0) {
// no random projection
docs = m_trainSet;
} else {
docs = m_buckets.get(getHashCode(doc));
if (docs.size() < m_k) {
System.err.println("L is set too large, tune the parameter.");
return -1;
}
}
MyPriorityQueue<_RankItem> neighbors = new MyPriorityQueue<_RankItem>(m_k);
for (_Doc d : docs) neighbors.add(new _RankItem(d.getYLabel(), Utils.dotProduct(d, doc)));
Arrays.fill(m_cProbs, 0);
for (_RankItem rt : neighbors) // why don't we consider the similarity?
m_cProbs[rt.m_index]++;
return Utils.argmax(m_cProbs);
}
use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.
the class KNN method score.
@Override
public double score(_Doc doc, int label) {
Collection<_Doc> docs;
if (m_l <= 0) {
// no random projection
docs = m_trainSet;
} else {
docs = m_buckets.get(getHashCode(doc));
if (docs.size() < m_k) {
System.err.println("L is set too large, tune the parameter.");
return -1;
}
}
MyPriorityQueue<_RankItem> neighbors = new MyPriorityQueue<_RankItem>(m_k);
for (_Doc d : docs) neighbors.add(new _RankItem(d.getYLabel(), Utils.dotProduct(d, doc)));
Arrays.fill(m_cProbs, 0);
for (_RankItem rt : neighbors) m_cProbs[rt.m_index]++;
// to be consistent with the predict function
return m_cProbs[label] - m_k;
}
use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC method printTopWords.
public void printTopWords(int k, String betaFile) {
double loglikelihood = calculate_log_likelihood();
System.out.format("Final Log Likelihood %.3f\t", loglikelihood);
String filePrefix = betaFile.replace("topWords.txt", "");
Arrays.fill(m_sstat, 0);
System.out.println("print top words");
for (_Doc d : m_trainSet) {
for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
}
Utils.L1Normalization(m_sstat);
try {
System.out.println("beta file");
PrintWriter betaOut = new PrintWriter(new File(betaFile));
for (int i = 0; i < topic_term_probabilty.length; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
betaOut.format("Topic %d(%.3f):\t", i, m_sstat[i]);
for (_RankItem it : fVector) {
betaOut.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
System.out.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
}
betaOut.println();
System.out.println();
}
betaOut.flush();
betaOut.close();
} catch (Exception ex) {
System.err.print("File Not Found");
}
}
Aggregations