Search in sources :

Example 91 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class weightedCorrespondenceModel method initialize_probability.

@Override
protected void initialize_probability(Collection<_Doc> collection) {
    init();
    for (_Doc d : collection) {
        if (d instanceof _ParentDoc4DCM) {
            int totalWords = 0;
            double totalLambda = 0;
            m_parentDocNum += 1;
            _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
            pDoc.setTopics4Variational(number_of_topics, d_alpha, vocabulary_size, d_beta);
            totalWords += pDoc.getTotalDocLength();
            for (_Stn stnObj : pDoc.getSentences()) stnObj.setTopicsVct(number_of_topics);
            for (_ChildDoc cDoc : pDoc.m_childDocs) {
                totalWords += cDoc.getTotalDocLength();
                m_childDocNum += 1;
                cDoc.setTopics4Variational(number_of_topics, d_alpha);
                // update the article thread sufficient statistics
                for (int n = 0; n < cDoc.getSparse().length; n++) {
                    _SparseFeature fv = cDoc.getSparse()[n];
                    int wID = fv.getIndex();
                    double wVal = fv.getValue();
                    for (int k = 0; k < number_of_topics; k++) {
                        pDoc.m_lambda_stat[k][wID] += cDoc.m_phi[n][k] * wVal;
                    }
                }
            }
            for (int k = 0; k < number_of_topics; k++) {
                pDoc.m_lambda_topicStat[k] = Utils.sumOfArray(pDoc.m_lambda_stat[k]);
                totalLambda += pDoc.m_lambda_topicStat[k];
            }
        // System.out.println("totalWords\t"+totalWords+"\t"+totalLambda);
        }
    }
    imposePrior();
}
Also used : structures._Stn(structures._Stn) structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._SparseFeature(structures._SparseFeature)

Example 92 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class weightedCorrespondenceModel method calculate_log_likelihood.

@Override
public double calculate_log_likelihood(_Doc d) {
    _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
    double logLikelihood = 0;
    double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
    double alphaSum = Utils.sumOfArray(m_alpha);
    logLikelihood += Utils.lgamma(alphaSum);
    logLikelihood -= Utils.lgamma(gammaSum);
    for (int k = 0; k < number_of_topics; k++) {
        logLikelihood += -Utils.lgamma(m_alpha[k]) + (m_alpha[k] - 1) * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
        logLikelihood += Utils.lgamma(pDoc.m_sstat[k]);
        logLikelihood -= (pDoc.m_sstat[k] - 1) * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
    }
    _SparseFeature[] fvs = pDoc.getSparse();
    for (int n = 0; n < fvs.length; n++) {
        int wID = fvs[n].getIndex();
        double wVal = fvs[n].getValue();
        for (int k = 0; k < number_of_topics; k++) {
            double updateLikelihood = 0;
            updateLikelihood -= pDoc.m_phi[n][k] * (Math.log(pDoc.m_phi[n][k]));
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
            }
            updateLikelihood += pDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
            }
            updateLikelihood += pDoc.m_phi[n][k] * wVal * (Utils.digamma(pDoc.m_lambda_stat[k][wID]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
                System.out.println("wVal\t" + wVal);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
                System.out.println("gammaSum\t" + gammaSum);
                System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID] + "\t" + Utils.digamma(pDoc.m_lambda_stat[k][wID]));
                System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k] + "\t" + Utils.digamma(pDoc.m_lambda_topicStat[k]));
            }
            logLikelihood += updateLikelihood;
            if (Double.isNaN(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
                System.out.println("wVal\t" + wVal);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
                System.out.println("gammaSum\t" + gammaSum);
                System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
                System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
            }
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
                System.out.println("wVal\t" + wVal);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
                System.out.println("gammaSum\t" + gammaSum);
                System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
                System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
            }
        }
    }
    double alphaCSum = Utils.sumOfArray(m_alpha_c);
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        logLikelihood += Utils.lgamma(alphaCSum);
        double piSum = Utils.sumOfArray(cDoc.m_sstat);
        logLikelihood -= Utils.lgamma(piSum);
        for (int k = 0; k < number_of_topics; k++) {
            logLikelihood -= Utils.lgamma(m_alpha_c[k]);
            logLikelihood += (m_alpha_c[k] - 1) * (Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
            logLikelihood += Utils.lgamma(cDoc.m_sstat[k]);
            logLikelihood -= (cDoc.m_sstat[k] - 1) * (Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
        }
        _SparseFeature[] cDocFvs = cDoc.getSparse();
        for (int n = 0; n < cDocFvs.length; n++) {
            int wID = cDocFvs[n].getIndex();
            double wVal = cDocFvs[n].getValue();
            for (int k = 0; k < number_of_topics; k++) {
                logLikelihood += cDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum) + Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
                logLikelihood -= cDoc.m_phi[n][k] * (Utils.dotProduct(cDoc.m_sstat, pDoc.m_sstat) / (piSum * gammaSum * cDoc.m_zeta) + Math.log(cDoc.m_zeta) - 1);
                logLikelihood += wVal * cDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_lambda_stat[k][wID]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
                logLikelihood -= cDoc.m_phi[n][k] * Math.log(cDoc.m_phi[n][k]);
                if (Double.isInfinite(logLikelihood)) {
                    System.out.println("\ncDoc likelihood\t" + "\t" + logLikelihood);
                    System.out.println("cDoc.m_phi[n][k]\t" + cDoc.m_phi[n][k]);
                    // System.out.println("pDoc.m_phi[n][k]\t"+pDoc.m_phi[n][k]);
                    System.out.println("pDoc.m_lambda_stat[][]\t" + pDoc.m_lambda_topicStat[k]);
                    System.out.println("cDoc.m_sstat[k]\t" + cDoc.m_sstat[k]);
                    System.out.println("piSum\t" + piSum);
                    // System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
                    // System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
                    System.out.println("cDoc zeta\t" + cDoc.m_zeta);
                }
            }
        }
    }
    for (int k = 0; k < number_of_topics; k++) {
        double betaSum = Utils.sumOfArray(m_beta[k]);
        logLikelihood += Utils.lgamma(betaSum);
        logLikelihood -= Utils.lgamma(pDoc.m_lambda_topicStat[k]);
        for (int v = 0; v < vocabulary_size; v++) {
            logLikelihood -= Utils.lgamma(m_beta[k][v]);
            logLikelihood += (m_beta[k][v] - 1) * (Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
            logLikelihood += Utils.lgamma(pDoc.m_lambda_stat[k][v]);
            logLikelihood -= (pDoc.m_lambda_stat[k][v] - 1) * (Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
        }
    }
    // System.out.println("doc \t"+pDoc.getName()+"\t likelihood \t"+logLikelihood);
    return logLikelihood;
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._SparseFeature(structures._SparseFeature)

Example 93 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class languageModelBaseLine method rankChild4StnByLikelihood.

protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
    HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        int cDocLen = cDoc.getTotalDocLength();
        _SparseFeature[] fv = cDoc.getSparse();
        double stnLogLikelihood = 0;
        double alphaDoc = m_smoothingMu / (m_smoothingMu + cDocLen);
        _SparseFeature[] sv = stnObj.getFv();
        for (_SparseFeature svWord : sv) {
            double featureLikelihood = 0;
            int wid = svWord.getIndex();
            double stnVal = svWord.getValue();
            int featureIndex = Utils.indexOf(fv, wid);
            if (featureIndex == -1)
                continue;
            double docVal = fv[featureIndex].getValue();
            double smoothingProb = docVal / (m_smoothingMu + cDocLen);
            smoothingProb += m_smoothingMu * m_wordSstat.get(wid) / (m_smoothingMu + cDocLen);
            featureLikelihood = Math.log(smoothingProb / (alphaDoc * m_wordSstat.get(wid)));
            stnLogLikelihood += stnVal * featureLikelihood;
        }
        stnLogLikelihood += stnObj.getLength() * Math.log(alphaDoc);
        childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
    }
    return childLikelihoodMap;
}
Also used : structures._ChildDoc(structures._ChildDoc) HashMap(java.util.HashMap) structures._SparseFeature(structures._SparseFeature)

Example 94 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class DCMLDA method initialize_probability.

@Override
protected void initialize_probability(Collection<_Doc> collection) {
    m_alpha = new double[number_of_topics];
    m_beta = new double[number_of_topics][vocabulary_size];
    m_totalAlpha = 0;
    m_totalBeta = new double[number_of_topics];
    m_alphaAuxilary = new double[number_of_topics];
    for (_Doc d : collection) {
        ((_Doc4DCMLDA) d).setTopics4Gibbs(number_of_topics, 0, vocabulary_size);
    // allocate memory and randomize it
    // ((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, 0);
    }
    initialAlphaBeta();
    imposePrior();
}
Also used : structures._Doc4DCMLDA(structures._Doc4DCMLDA) structures._Doc(structures._Doc)

Aggregations

structures._ChildDoc (structures._ChildDoc)77 structures._ParentDoc (structures._ParentDoc)47 structures._Doc (structures._Doc)35 structures._Stn (structures._Stn)25 structures._Word (structures._Word)22 File (java.io.File)18 structures._ParentDoc4DCM (structures._ParentDoc4DCM)16 structures._SparseFeature (structures._SparseFeature)16 HashMap (java.util.HashMap)14 PrintWriter (java.io.PrintWriter)12 FileNotFoundException (java.io.FileNotFoundException)11 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)2 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1 Parameter (Classifier.supervised.liblinear.Parameter)1 Problem (Classifier.supervised.liblinear.Problem)1 SolverType (Classifier.supervised.liblinear.SolverType)1