use of structures._Word in project IR_Base by Linda-sunshine.
the class DCMLDA4AC_test method printChildTopicAssignment.
protected void printChildTopicAssignment(_Doc d, File topicFolder) {
String topicAssignmentFile = d.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Word w : d.getWords()) {
int wid = w.getIndex();
int tid = w.getTopic();
String featureName = m_corpus.getFeature(wid);
pw.print(featureName + ":" + tid + "\t");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC method initialize_probability.
protected void initialize_probability(Collection<_Doc> collection) {
createSpace();
for (int i = 0; i < number_of_topics; i++) {
Arrays.fill(topic_term_probabilty[i], 0);
Arrays.fill(word_topic_sstat[i], d_beta);
}
Arrays.fill(m_sstat, d_beta * vocabulary_size);
for (_Doc d : collection) {
if (d instanceof _ParentDoc) {
for (_Stn stnObj : d.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
d.setTopics4Gibbs(number_of_topics, d_alpha);
} else if (d instanceof _ChildDoc) {
((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, d_alpha);
}
for (_Word w : d.getWords()) {
word_topic_sstat[w.getTopic()][w.getIndex()]++;
m_sstat[w.getTopic()]++;
}
}
imposePrior();
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
createSpace();
for (int i = 0; i < number_of_topics; i++) Arrays.fill(word_topic_sstat[i], d_beta);
Arrays.fill(m_sstat, d_beta * vocabulary_size);
for (_Doc d : collection) {
if (d instanceof _ParentDoc) {
for (_Stn stnObj : d.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
d.setTopics4Gibbs(number_of_topics, 0);
} else if (d instanceof _ChildDoc) {
((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, 0);
}
for (_Word w : d.getWords()) {
word_topic_sstat[w.getTopic()][w.getIndex()]++;
m_sstat[w.getTopic()]++;
}
}
imposePrior();
m_statisticsNormalized = false;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method sampleInChildDoc.
protected void sampleInChildDoc(_Doc d) {
_ChildDoc cDoc = (_ChildDoc) d;
int wid, tid;
double normalizedProb = 0;
for (_Word w : cDoc.getWords()) {
wid = w.getIndex();
tid = w.getTopic();
cDoc.m_sstat[tid]--;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]--;
m_sstat[tid]--;
}
normalizedProb = 0;
for (tid = 0; tid < number_of_topics; tid++) {
double pWordTopic = childWordByTopicProb(tid, wid);
double pTopicDoc = childTopicInDocProb(tid, cDoc);
m_topicProbCache[tid] = pWordTopic * pTopicDoc;
normalizedProb += m_topicProbCache[tid];
}
normalizedProb *= m_rand.nextDouble();
for (tid = 0; tid < number_of_topics; tid++) {
normalizedProb -= m_topicProbCache[tid];
if (normalizedProb < 0)
break;
}
if (tid == number_of_topics)
tid--;
w.setTopic(tid);
cDoc.m_sstat[tid]++;
if (m_collectCorpusStats) {
word_topic_sstat[tid][wid]++;
m_sstat[tid]++;
}
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method generateReferenceModelWithXVal.
protected void generateReferenceModelWithXVal() {
m_allWordFrequencyWithXVal = 0;
for (_Doc d : m_corpus.getCollection()) {
if (d instanceof _ParentDoc) {
for (_SparseFeature fv : d.getSparse()) {
int wid = fv.getIndex();
double val = fv.getValue();
m_allWordFrequencyWithXVal += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
} else {
double docLenWithXVal = 0;
for (_Word w : d.getWords()) {
// double xProportion = w.getXProb();
int wid = w.getIndex();
double val = 0;
if (((_ChildDoc) d).m_wordXStat.containsKey(wid)) {
val = ((_ChildDoc) d).m_wordXStat.get(wid);
}
docLenWithXVal += val;
m_allWordFrequencyWithXVal += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
((_ChildDoc) d).setChildDocLenWithXVal(docLenWithXVal);
}
}
for (int wid : m_wordSstat.keySet()) {
double val = m_wordSstat.get(wid);
double prob = val / m_allWordFrequencyWithXVal;
m_wordSstat.put(wid, prob);
}
}
Aggregations