use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class DCMCorrLDA method computeTestMu4Doc.
protected void computeTestMu4Doc(_ChildDoc d) {
_ParentDoc pDoc = d.m_parentDoc;
double mu = Utils.cosine(d.getSparseVct4Infer(), pDoc.getSparseVct4Infer());
mu = 0.05;
d.setMu(mu);
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class DCMCorrLDA method computeMu4Doc.
protected void computeMu4Doc(_ChildDoc d) {
_ParentDoc tempParent = d.m_parentDoc;
double mu = Utils.cosine(tempParent.getSparse(), d.getSparse());
mu = 0.5;
d.setMu(mu);
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class DCMCorrLDA method sampleInChildDoc.
protected void sampleInChildDoc(_ChildDoc d) {
int wid, tid;
double normalizedProb;
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d.m_parentDoc;
for (_Word w : d.getWords()) {
tid = w.getTopic();
wid = w.getIndex();
pDoc.m_wordTopic_stat[tid][wid]--;
pDoc.m_topic_stat[tid]--;
d.m_sstat[tid]--;
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = childWordByTopicProb(tid, wid, pDoc);
double pTopic = childTopicInDocProb(tid, d, pDoc);
m_topicProbCache[tid] = pWordTopic * pTopic;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < m_topicProbCache.length; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb <= 0)
break;
}
if (tid == m_topicProbCache.length)
tid--;
w.setTopic(tid);
d.m_sstat[tid]++;
pDoc.m_topic_stat[tid]++;
pDoc.m_wordTopic_stat[tid][wid]++;
}
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class DCMCorrLDA_multi_E_test method printParameter.
protected void printParameter(String parentParameterFile, String childParameterFile, ArrayList<_Doc> docList) {
System.out.println("printing parameter");
try {
System.out.println(parentParameterFile);
System.out.println(childParameterFile);
PrintWriter parentParaOut = new PrintWriter(new File(parentParameterFile));
PrintWriter childParaOut = new PrintWriter(new File(childParameterFile));
for (_Doc d : docList) {
if (d instanceof _ParentDoc) {
parentParaOut.print(d.getName() + "\t");
parentParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(d.m_topics[k] + "\t");
}
parentParaOut.println();
for (_ChildDoc cDoc : ((_ParentDoc) d).m_childDocs) {
childParaOut.print(cDoc.getName() + "\t");
childParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
childParaOut.print(cDoc.m_topics[k] + "\t");
}
childParaOut.println();
}
}
}
parentParaOut.flush();
parentParaOut.close();
childParaOut.flush();
childParaOut.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class DCMLDA4AC method sampleInChildDoc.
protected void sampleInChildDoc(_ChildDoc d) {
int wid, tid;
double normalizedProb;
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d.m_parentDoc;
for (_Word w : d.getWords()) {
tid = w.getTopic();
wid = w.getIndex();
pDoc.m_wordTopic_stat[tid][wid]--;
pDoc.m_topic_stat[tid]--;
d.m_sstat[tid]--;
if (!m_collectCorpusStats)
word_topic_sstat[tid][wid]--;
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = wordTopicProb(tid, wid, pDoc);
double pTopic = topicInDocProb(tid, d);
m_topicProbCache[tid] = pWordTopic * pTopic;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < m_topicProbCache.length; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb <= 0)
break;
}
if (tid == m_topicProbCache.length)
tid--;
w.setTopic(tid);
d.m_sstat[tid]++;
pDoc.m_topic_stat[tid]++;
pDoc.m_wordTopic_stat[tid][wid]++;
if (!m_collectCorpusStats)
word_topic_sstat[tid][wid]--;
}
}
Aggregations