use of structures._Stn in project IR_Base by Linda-sunshine.
the class DCMCorrLDA_multi_E_test method rankChild4StnByLikelihood.
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc4DCM pDoc) {
HashMap<String, Double> likelihoodMap = new HashMap<String, Double>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
double stnLogLikelihood = 0;
for (_Word w : stnObj.getWords()) {
double wordLikelihood = 0;
int wid = w.getIndex();
for (int k = 0; k < number_of_topics; k++) {
wordLikelihood += cDoc.m_topics[k] * pDoc.m_wordTopic_prob[k][wid];
}
stnLogLikelihood += wordLikelihood;
}
likelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return likelihoodMap;
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class DCMCorrLDA_test method printTopKChild4Stn.
protected void printTopKChild4Stn(String filePrefix, int topK) {
String topKChild4StnFile = filePrefix + "topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc4DCM) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
int i = 0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (String e : likelihoodMap.keySet()) {
pw.print(e);
pw.print(":" + likelihoodMap.get(e));
pw.print("\t");
i++;
}
pw.println();
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class DCMCorrLDA_test method rankChild4StnByLikelihood.
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc4DCM pDoc) {
HashMap<String, Double> likelihoodMap = new HashMap<String, Double>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
double stnLogLikelihood = 0;
for (_Word w : stnObj.getWords()) {
double wordLikelihood = 0;
int wid = w.getIndex();
for (int k = 0; k < number_of_topics; k++) {
wordLikelihood += childTopicInDocProb(k, cDoc, pDoc) * childWordByTopicProb(k, wid, pDoc);
}
stnLogLikelihood += wordLikelihood;
}
likelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return likelihoodMap;
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method initTestDoc.
public void initTestDoc(ArrayList<_Doc> sampleTestSet, _Doc d) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
int testLength = 0;
pDoc.setTopics4GibbsTest(number_of_topics, 0, testLength, vocabulary_size);
sampleTestSet.add(pDoc);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
testLength = (int) (m_testWord4PerplexityProportion * cDoc.getTotalDocLength());
cDoc.setTopics4GibbsTest(number_of_topics, d_alpha, testLength);
for (_Word w : d.getWords()) {
int wid = w.getIndex();
int tid = w.getTopic();
pDoc.m_wordTopic_stat[tid][wid]++;
pDoc.m_topic_stat[tid]++;
}
sampleTestSet.add(cDoc);
cDoc.createSparseVct4Infer();
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class AspectAnalyzer method collectStats.
void collectStats(_Doc d) {
int aspectID, wordID;
for (_Stn s : d.getSentences()) {
if ((aspectID = s.getTopic()) > -1) {
// if it is annotated
for (_SparseFeature f : s.getFv()) {
wordID = f.getIndex();
m_featureStat.get(m_featureNames.get(wordID)).addOneDF(aspectID);
}
m_aspectDist[aspectID]++;
}
}
}
Aggregations