use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method childTopicInDocProb.
protected double childTopicInDocProb(int tid, _ChildDoc d) {
_ParentDoc pDoc = (_ParentDoc) (d.m_parentDoc);
double pDocTopicSum = Utils.sumOfArray(pDoc.m_sstat);
double term = (pDoc.m_sstat[tid] + m_smoothingParam) / (pDocTopicSum + m_smoothingParam * number_of_topics);
return term;
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
createSpace();
for (int i = 0; i < number_of_topics; i++) Arrays.fill(word_topic_sstat[i], d_beta);
Arrays.fill(m_sstat, d_beta * vocabulary_size);
for (_Doc d : collection) {
if (d instanceof _ParentDoc) {
for (_Stn stnObj : d.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
d.setTopics4Gibbs(number_of_topics, 0);
} else if (d instanceof _ChildDoc) {
((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, 0);
}
for (_Word w : d.getWords()) {
word_topic_sstat[w.getTopic()][w.getIndex()]++;
m_sstat[w.getTopic()]++;
}
}
imposePrior();
m_statisticsNormalized = false;
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method calculate_log_likelihood4Parent.
protected double calculate_log_likelihood4Parent(_Doc d) {
_ParentDoc pDoc = (_ParentDoc) d;
double docLogLikelihood = 0;
_SparseFeature[] fv = pDoc.getSparse();
double docTopicSum = Utils.sumOfArray(pDoc.m_sstat);
double alphaSum = d_alpha * number_of_topics;
for (int j = 0; j < fv.length; j++) {
int wid = fv[j].getIndex();
double value = fv[j].getValue();
double wordLogLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
double wordPerTopicLikelihood = parentWordByTopicProb(k, wid) * parentTopicInDocProb(k, pDoc) / (alphaSum + docTopicSum);
wordLogLikelihood += wordPerTopicLikelihood;
}
if (Math.abs(wordLogLikelihood) < 1e-10) {
System.out.println("wordLogLikelihood\t" + wordLogLikelihood);
wordLogLikelihood += 1e-10;
}
wordLogLikelihood = Math.log(wordLogLikelihood);
docLogLikelihood += value * wordLogLikelihood;
}
return docLogLikelihood;
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method initTest.
protected void initTest(ArrayList<_Doc> sampleTestSet, _Doc d) {
_ParentDoc pDoc = (_ParentDoc) d;
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
int testLength = (int) (m_testWord4PerplexityProportion * pDoc.getTotalDocLength());
pDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
sampleTestSet.add(pDoc);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
testLength = (int) (m_testWord4PerplexityProportion * cDoc.getTotalDocLength());
cDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
sampleTestSet.add(cDoc);
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method generateReferenceModelWithXVal.
protected void generateReferenceModelWithXVal() {
m_allWordFrequencyWithXVal = 0;
for (_Doc d : m_corpus.getCollection()) {
if (d instanceof _ParentDoc) {
for (_SparseFeature fv : d.getSparse()) {
int wid = fv.getIndex();
double val = fv.getValue();
m_allWordFrequencyWithXVal += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
} else {
double docLenWithXVal = 0;
for (_Word w : d.getWords()) {
// double xProportion = w.getXProb();
int wid = w.getIndex();
double val = 0;
if (((_ChildDoc) d).m_wordXStat.containsKey(wid)) {
val = ((_ChildDoc) d).m_wordXStat.get(wid);
}
docLenWithXVal += val;
m_allWordFrequencyWithXVal += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
((_ChildDoc) d).setChildDocLenWithXVal(docLenWithXVal);
}
}
for (int wid : m_wordSstat.keySet()) {
double val = m_wordSstat.get(wid);
double prob = val / m_allWordFrequencyWithXVal;
m_wordSstat.put(wid, prob);
}
}
Aggregations