use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class weightedCorrespondenceModel method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
init();
for (_Doc d : collection) {
if (d instanceof _ParentDoc4DCM) {
int totalWords = 0;
double totalLambda = 0;
m_parentDocNum += 1;
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
pDoc.setTopics4Variational(number_of_topics, d_alpha, vocabulary_size, d_beta);
totalWords += pDoc.getTotalDocLength();
for (_Stn stnObj : pDoc.getSentences()) stnObj.setTopicsVct(number_of_topics);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
totalWords += cDoc.getTotalDocLength();
m_childDocNum += 1;
cDoc.setTopics4Variational(number_of_topics, d_alpha);
// update the article thread sufficient statistics
for (int n = 0; n < cDoc.getSparse().length; n++) {
_SparseFeature fv = cDoc.getSparse()[n];
int wID = fv.getIndex();
double wVal = fv.getValue();
for (int k = 0; k < number_of_topics; k++) {
pDoc.m_lambda_stat[k][wID] += cDoc.m_phi[n][k] * wVal;
}
}
}
for (int k = 0; k < number_of_topics; k++) {
pDoc.m_lambda_topicStat[k] = Utils.sumOfArray(pDoc.m_lambda_stat[k]);
totalLambda += pDoc.m_lambda_topicStat[k];
}
// System.out.println("totalWords\t"+totalWords+"\t"+totalLambda);
}
}
imposePrior();
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class weightedCorrespondenceModel method calculate_log_likelihood.
@Override
public double calculate_log_likelihood(_Doc d) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
double logLikelihood = 0;
double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
double alphaSum = Utils.sumOfArray(m_alpha);
logLikelihood += Utils.lgamma(alphaSum);
logLikelihood -= Utils.lgamma(gammaSum);
for (int k = 0; k < number_of_topics; k++) {
logLikelihood += -Utils.lgamma(m_alpha[k]) + (m_alpha[k] - 1) * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
logLikelihood += Utils.lgamma(pDoc.m_sstat[k]);
logLikelihood -= (pDoc.m_sstat[k] - 1) * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
}
_SparseFeature[] fvs = pDoc.getSparse();
for (int n = 0; n < fvs.length; n++) {
int wID = fvs[n].getIndex();
double wVal = fvs[n].getValue();
for (int k = 0; k < number_of_topics; k++) {
double updateLikelihood = 0;
updateLikelihood -= pDoc.m_phi[n][k] * (Math.log(pDoc.m_phi[n][k]));
if (Double.isInfinite(updateLikelihood)) {
System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
}
updateLikelihood += pDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
if (Double.isInfinite(updateLikelihood)) {
System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
}
updateLikelihood += pDoc.m_phi[n][k] * wVal * (Utils.digamma(pDoc.m_lambda_stat[k][wID]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
if (Double.isInfinite(updateLikelihood)) {
System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
System.out.println("wVal\t" + wVal);
System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
System.out.println("gammaSum\t" + gammaSum);
System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID] + "\t" + Utils.digamma(pDoc.m_lambda_stat[k][wID]));
System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k] + "\t" + Utils.digamma(pDoc.m_lambda_topicStat[k]));
}
logLikelihood += updateLikelihood;
if (Double.isNaN(updateLikelihood)) {
System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
System.out.println("wVal\t" + wVal);
System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
System.out.println("gammaSum\t" + gammaSum);
System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
}
if (Double.isInfinite(updateLikelihood)) {
System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
System.out.println("wVal\t" + wVal);
System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
System.out.println("gammaSum\t" + gammaSum);
System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
}
}
}
double alphaCSum = Utils.sumOfArray(m_alpha_c);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
logLikelihood += Utils.lgamma(alphaCSum);
double piSum = Utils.sumOfArray(cDoc.m_sstat);
logLikelihood -= Utils.lgamma(piSum);
for (int k = 0; k < number_of_topics; k++) {
logLikelihood -= Utils.lgamma(m_alpha_c[k]);
logLikelihood += (m_alpha_c[k] - 1) * (Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
logLikelihood += Utils.lgamma(cDoc.m_sstat[k]);
logLikelihood -= (cDoc.m_sstat[k] - 1) * (Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
}
_SparseFeature[] cDocFvs = cDoc.getSparse();
for (int n = 0; n < cDocFvs.length; n++) {
int wID = cDocFvs[n].getIndex();
double wVal = cDocFvs[n].getValue();
for (int k = 0; k < number_of_topics; k++) {
logLikelihood += cDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum) + Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
logLikelihood -= cDoc.m_phi[n][k] * (Utils.dotProduct(cDoc.m_sstat, pDoc.m_sstat) / (piSum * gammaSum * cDoc.m_zeta) + Math.log(cDoc.m_zeta) - 1);
logLikelihood += wVal * cDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_lambda_stat[k][wID]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
logLikelihood -= cDoc.m_phi[n][k] * Math.log(cDoc.m_phi[n][k]);
if (Double.isInfinite(logLikelihood)) {
System.out.println("\ncDoc likelihood\t" + "\t" + logLikelihood);
System.out.println("cDoc.m_phi[n][k]\t" + cDoc.m_phi[n][k]);
// System.out.println("pDoc.m_phi[n][k]\t"+pDoc.m_phi[n][k]);
System.out.println("pDoc.m_lambda_stat[][]\t" + pDoc.m_lambda_topicStat[k]);
System.out.println("cDoc.m_sstat[k]\t" + cDoc.m_sstat[k]);
System.out.println("piSum\t" + piSum);
// System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
// System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
System.out.println("cDoc zeta\t" + cDoc.m_zeta);
}
}
}
}
for (int k = 0; k < number_of_topics; k++) {
double betaSum = Utils.sumOfArray(m_beta[k]);
logLikelihood += Utils.lgamma(betaSum);
logLikelihood -= Utils.lgamma(pDoc.m_lambda_topicStat[k]);
for (int v = 0; v < vocabulary_size; v++) {
logLikelihood -= Utils.lgamma(m_beta[k][v]);
logLikelihood += (m_beta[k][v] - 1) * (Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
logLikelihood += Utils.lgamma(pDoc.m_lambda_stat[k][v]);
logLikelihood -= (pDoc.m_lambda_stat[k][v] - 1) * (Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
}
}
// System.out.println("doc \t"+pDoc.getName()+"\t likelihood \t"+logLikelihood);
return logLikelihood;
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method rankChild4StnByLikelihood.
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
int cDocLen = cDoc.getTotalDocLength();
_SparseFeature[] fv = cDoc.getSparse();
double stnLogLikelihood = 0;
double alphaDoc = m_smoothingMu / (m_smoothingMu + cDocLen);
_SparseFeature[] sv = stnObj.getFv();
for (_SparseFeature svWord : sv) {
double featureLikelihood = 0;
int wid = svWord.getIndex();
double stnVal = svWord.getValue();
int featureIndex = Utils.indexOf(fv, wid);
if (featureIndex == -1)
continue;
double docVal = fv[featureIndex].getValue();
double smoothingProb = docVal / (m_smoothingMu + cDocLen);
smoothingProb += m_smoothingMu * m_wordSstat.get(wid) / (m_smoothingMu + cDocLen);
featureLikelihood = Math.log(smoothingProb / (alphaDoc * m_wordSstat.get(wid)));
stnLogLikelihood += stnVal * featureLikelihood;
}
stnLogLikelihood += stnObj.getLength() * Math.log(alphaDoc);
childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return childLikelihoodMap;
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class DCMLDA method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
m_alpha = new double[number_of_topics];
m_beta = new double[number_of_topics][vocabulary_size];
m_totalAlpha = 0;
m_totalBeta = new double[number_of_topics];
m_alphaAuxilary = new double[number_of_topics];
for (_Doc d : collection) {
((_Doc4DCMLDA) d).setTopics4Gibbs(number_of_topics, 0, vocabulary_size);
// allocate memory and randomize it
// ((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, 0);
}
initialAlphaBeta();
imposePrior();
}
Aggregations