use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class sparseDCMLDA method updateBeta.
@Override
protected void updateBeta(int tid) {
double diff = 0;
int iteration = 0;
double smoothingBeta = 0.1;
do {
diff = 0;
double deltaBeta = 0;
double wordNum4Tid = 0;
double[] wordNum4Tid4V = new double[vocabulary_size];
double totalBetaDenominator = 0;
double[] totalBetaNumerator = new double[vocabulary_size];
Arrays.fill(totalBetaNumerator, 0);
Arrays.fill(wordNum4Tid4V, 0);
m_totalBeta[tid] = Utils.sumOfArray(m_beta[tid]);
double digBeta4Tid = Utils.digamma(m_mu * m_totalBeta[tid]);
for (_Doc d : m_trainSet) {
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
totalBetaDenominator += Utils.digamma(m_mu * m_totalBeta[tid] + DCMDoc.m_sstat[tid]) - digBeta4Tid;
for (int v = 0; v < vocabulary_size; v++) {
wordNum4Tid += DCMDoc.m_wordTopic_stat[tid][v];
wordNum4Tid4V[v] += DCMDoc.m_wordTopic_stat[tid][v];
totalBetaNumerator[v] += Utils.digamma(m_mu * m_beta[tid][v] + DCMDoc.m_wordTopic_stat[tid][v]);
totalBetaNumerator[v] -= Utils.digamma(m_mu * m_beta[tid][v]);
}
}
for (int v = 0; v < vocabulary_size; v++) {
if (wordNum4Tid == 0)
break;
if (wordNum4Tid4V[v] == 0) {
deltaBeta = 0;
} else {
deltaBeta = totalBetaNumerator[v] / totalBetaDenominator;
}
double newBeta = m_beta[tid][v] * deltaBeta + d_beta;
double t_diff = Math.abs(m_beta[tid][v] - newBeta);
if (t_diff > diff)
diff = t_diff;
m_beta[tid][v] = newBeta;
}
iteration++;
} while ((diff > m_newtonConverge) && (iteration < m_newtonIter));
System.out.println("iteration\t" + iteration);
}
use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class DCMLDA method estThetaInDoc.
@Override
protected void estThetaInDoc(_Doc d) {
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (int i = 0; i < number_of_topics; i++) Utils.L1Normalization(DCMDoc.m_wordTopic_prob[i]);
Utils.L1Normalization(d.m_topics);
}
use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class DCMLDA method cal_logLikelihood4Partial.
protected double cal_logLikelihood4Partial(_Doc d) {
double docLogLikelihood = 0;
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (_Word w : DCMDoc.getTestWords()) {
int wid = w.getIndex();
double wordLogLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
double wordPerTopicLikelihood = DCMDoc.m_topics[k] * DCMDoc.m_wordTopic_prob[k][wid];
wordLogLikelihood += wordPerTopicLikelihood;
}
docLogLikelihood += Math.log(wordLogLikelihood);
}
return docLogLikelihood;
}
use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class DCMLDA method updateBeta.
protected void updateBeta(int tid) {
double diff = 0;
int iteration = 0;
do {
double deltaBeta = 0;
double wordNum4Tid = 0;
double[] wordNum4Tid4V = new double[vocabulary_size];
double totalBetaDenominator = 0;
double[] totalBetaNumerator = new double[vocabulary_size];
m_totalBeta[tid] = Utils.sumOfArray(m_beta[tid]);
double digBeta4Tid = Utils.digamma(m_totalBeta[tid]);
for (_Doc d : m_trainSet) {
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
totalBetaDenominator += Utils.digamma(m_totalBeta[tid] + DCMDoc.m_sstat[tid]) - digBeta4Tid;
for (int v = 0; v < vocabulary_size; v++) {
wordNum4Tid += DCMDoc.m_wordTopic_stat[tid][v];
wordNum4Tid4V[v] += DCMDoc.m_wordTopic_stat[tid][v];
totalBetaNumerator[v] += Utils.digamma(m_beta[tid][v] + DCMDoc.m_wordTopic_stat[tid][v]) - Utils.digamma(m_beta[tid][v]);
}
}
diff = 0;
for (int v = 0; v < vocabulary_size; v++) {
if (wordNum4Tid == 0)
// this is almost impossible
break;
else if (wordNum4Tid4V[v] == 0)
// why this goes to zero?
deltaBeta = 0;
else
deltaBeta = totalBetaNumerator[v] / totalBetaDenominator;
double newBeta = m_beta[tid][v] * deltaBeta + d_beta;
double t_diff = Math.abs(m_beta[tid][v] - newBeta);
if (t_diff > diff)
diff = t_diff;
m_beta[tid][v] = newBeta;
}
iteration++;
} while ((diff > m_newtonConverge) && (iteration < m_newtonIter));
System.out.format("[Info]beta update finishes at iteration %d, diff: %.3f", iteration, diff);
}
use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.
the class sparseDCMLDA_test method printWordTopicDistribution.
protected void printWordTopicDistribution(_Doc d, File wordTopicDistributionFolder, int k) {
String wordTopicDistributionFile = d.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(wordTopicDistributionFolder, wordTopicDistributionFile));
_Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
for (int i = 0; i < number_of_topics; i++) {
MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
for (int v = 0; v < vocabulary_size; v++) {
String featureName = m_corpus.getFeature(v);
double wordProb = DCMDoc.m_wordTopic_prob[i][v];
_RankItem ri = new _RankItem(featureName, wordProb);
fVector.add(ri);
}
pw.format("Topic %d(%.5f):\t", i, d.m_topics[i]);
for (_RankItem it : fVector) pw.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
pw.write("\n");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
Aggregations