use of structures._Word in project IR_Base by Linda-sunshine.
the class sparseLDA method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
for (int i = 0; i < number_of_topics; i++) Arrays.fill(word_topic_sstat[i], d_beta);
Arrays.fill(m_sstat, d_beta * vocabulary_size);
Arrays.fill(m_topicProbCache, 0);
// initialize topic-word allocation, p(w|z)
for (_Doc d : collection) {
_Doc4SparseDCMLDA DCMDoc = (_Doc4SparseDCMLDA) d;
// allocate memory and randomize it
DCMDoc.setTopics4Gibbs(number_of_topics, d_alpha);
for (_Word w : d.getWords()) {
word_topic_sstat[w.getTopic()][w.getIndex()]++;
m_sstat[w.getTopic()]++;
}
}
imposePrior();
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class sparseLDA_test method printParentTopicAssignment.
protected void printParentTopicAssignment(_Doc d, File topicFolder) {
String topicAssignmentFile = d.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Word w : d.getWords()) {
int index = w.getIndex();
int topic = w.getTopic();
String featureName = m_corpus.getFeature(index);
pw.print(featureName + ":" + topic + "\t");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM method sampleInParentDoc.
protected void sampleInParentDoc(_Doc d) {
_ParentDoc pDoc = (_ParentDoc) d;
int wid, tid;
double normalizedProb;
for (_Word w : pDoc.getWords()) {
wid = w.getIndex();
tid = w.getTopic();
pDoc.m_sstat[tid]--;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]--;
m_sstat[tid]--;
}
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = parentWordByTopicProb(tid, wid);
double pTopicPDoc = parentTopicInDocProb(tid, pDoc);
double pTopicCDoc = parentChildInfluenceProb(tid, pDoc);
m_topicProbCache[tid] = pWordTopic * pTopicPDoc * pTopicCDoc;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < number_of_topics; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb <= 0)
break;
}
if (tid == number_of_topics)
tid--;
w.setTopic(tid);
pDoc.m_sstat[tid]++;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]++;
m_sstat[tid]++;
}
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM method initialize_probability.
protected void initialize_probability(Collection<_Doc> collection) {
createSpace();
for (int i = 0; i < number_of_topics; i++) Arrays.fill(word_topic_sstat[i], d_beta);
// avoid adding such prior later on
Arrays.fill(m_sstat, d_beta * vocabulary_size);
for (_Doc d : collection) {
if (d instanceof _ParentDoc) {
d.setTopics4Gibbs(number_of_topics, 0);
for (_Stn stnObj : d.getSentences()) stnObj.setTopicsVct(number_of_topics);
} else if (d instanceof _ChildDoc) {
((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, 0);
computeMu4Doc((_ChildDoc) d);
}
for (_Word w : d.getWords()) {
word_topic_sstat[w.getTopic()][w.getIndex()]++;
m_sstat[w.getTopic()]++;
}
}
imposePrior();
m_statisticsNormalized = false;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM_C method collectChildStats.
@Override
protected void collectChildStats(_Doc d) {
_ChildDoc4BaseWithPhi cDoc = (_ChildDoc4BaseWithPhi) d;
_ParentDoc pDoc = cDoc.m_parentDoc;
double pDocTopicSum = Utils.sumOfArray(pDoc.m_sstat);
for (int k = 0; k < this.number_of_topics; k++) cDoc.m_xTopics[0][k] += cDoc.m_xTopicSstat[0][k] + d_alpha + cDoc.getMu() * pDoc.m_sstat[k] / pDocTopicSum;
for (int x = 0; x < m_gamma.length; x++) cDoc.m_xProportion[x] += m_gamma[x] + cDoc.m_xSstat[x];
for (int w = 0; w < vocabulary_size; w++) cDoc.m_xTopics[1][w] += cDoc.m_xTopicSstat[1][w];
for (_Word w : d.getWords()) {
w.collectXStats();
}
}
Aggregations