use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method sampleInParentDoc.
protected void sampleInParentDoc(_Doc d) {
_ParentDoc pDoc = (_ParentDoc) d;
int wid, tid;
double normalizedProb;
for (_Word w : pDoc.getWords()) {
wid = w.getIndex();
tid = w.getTopic();
pDoc.m_sstat[tid]--;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]--;
m_sstat[tid]--;
}
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = parentWordByTopicProb(tid, wid);
double pTopicPDoc = parentTopicInDocProb(tid, pDoc);
double pTopicCDoc = parentChildInfluenceProb(tid, pDoc);
m_topicProbCache[tid] = pWordTopic * pTopicPDoc * pTopicCDoc;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < number_of_topics; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb < 0)
break;
}
if (tid == number_of_topics)
tid--;
w.setTopic(tid);
pDoc.m_sstat[tid]++;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]++;
m_sstat[tid]++;
}
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method printTopChild4Stn.
protected void printTopChild4Stn(String filePrefix) {
String topChild4StnFile = filePrefix + "/topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topChild4StnFile));
for (_Doc d : m_corpus.getCollection()) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
// HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
HashMap<String, Double> likelihoodMap = rankChild4StnByLanguageModel(stnObj, pDoc);
// int i=0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (Map.Entry<String, Double> e : sortHashMap4String(likelihoodMap, true)) {
// if(i==topK)
// break;
pw.print(e.getKey());
pw.print(":" + e.getValue());
pw.print("\t");
// i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method rankChild4StnByLikelihood.
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
int cDocLen = cDoc.getTotalDocLength();
_SparseFeature[] fv = cDoc.getSparse();
double stnLogLikelihood = 0;
double alphaDoc = m_smoothingMu / (m_smoothingMu + cDocLen);
_SparseFeature[] sv = stnObj.getFv();
for (_SparseFeature svWord : sv) {
double featureLikelihood = 0;
int wid = svWord.getIndex();
double stnVal = svWord.getValue();
int featureIndex = Utils.indexOf(fv, wid);
if (featureIndex == -1)
continue;
double docVal = fv[featureIndex].getValue();
double smoothingProb = docVal / (m_smoothingMu + cDocLen);
smoothingProb += m_smoothingMu * m_wordSstat.get(wid) / (m_smoothingMu + cDocLen);
featureLikelihood = Math.log(smoothingProb / (alphaDoc * m_wordSstat.get(wid)));
stnLogLikelihood += stnVal * featureLikelihood;
}
stnLogLikelihood += stnObj.getLength() * Math.log(alphaDoc);
childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return childLikelihoodMap;
}
Aggregations