Search in sources :

Example 61 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method rankChild4StnByHybridPro.

protected HashMap<String, Double> rankChild4StnByHybridPro(_Stn stnObj, _ParentDoc pDoc) {
    HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
    double smoothingMu = m_LM.m_smoothingMu;
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        double cDocLen = cDoc.getTotalDocLength();
        double stnLogLikelihood = 0;
        double alphaDoc = smoothingMu / (smoothingMu + cDocLen);
        _SparseFeature[] fv = cDoc.getSparse();
        _SparseFeature[] sv = stnObj.getFv();
        for (_SparseFeature svWord : sv) {
            double wordLikelihood = 0;
            int wid = svWord.getIndex();
            double stnVal = svWord.getValue();
            int featureIndex = Utils.indexOf(fv, wid);
            double docVal = 0;
            if (featureIndex != -1) {
                docVal = fv[featureIndex].getValue();
            }
            double LMLikelihood = (1 - alphaDoc) * docVal / cDocLen;
            LMLikelihood += alphaDoc * m_LM.getReferenceProb(wid);
            double TMLikelihood = 0;
            for (int k = 0; k < number_of_topics; k++) {
                double wordPerTopicLikelihood = (word_topic_sstat[k][wid] / m_sstat[k]) * (topicInDocProb(k, cDoc) / (d_alpha * number_of_topics + cDocLen));
                TMLikelihood += wordPerTopicLikelihood;
            }
            wordLikelihood = m_tau * LMLikelihood + (1 - m_tau) * TMLikelihood;
            wordLikelihood = Math.log(wordLikelihood);
            stnLogLikelihood += stnVal * wordLikelihood;
        }
        double cosineSim = computeSimilarity(stnObj.m_topics, cDoc.m_topics);
        stnLogLikelihood = m_tau * stnLogLikelihood + (1 - m_tau) * cosineSim;
        childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
    }
    return childLikelihoodMap;
}
Also used : structures._ChildDoc(structures._ChildDoc) HashMap(java.util.HashMap) structures._SparseFeature(structures._SparseFeature)

Example 62 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class corrLDA_Gibbs method calculate_log_likelihood4Child.

protected double calculate_log_likelihood4Child(_Doc d) {
    _ChildDoc cDoc = (_ChildDoc) d;
    double docLogLikelihood = 0;
    _SparseFeature[] fv = cDoc.getSparse();
    double docTopicSum = Utils.sumOfArray(cDoc.m_sstat);
    double smoothingSum = m_smoothingParam * number_of_topics;
    for (int i = 0; i < fv.length; i++) {
        int wid = fv[i].getIndex();
        double value = fv[i].getValue();
        double wordLogLikelihood = 0;
        for (int k = 0; k < number_of_topics; k++) {
            double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDoc(k, cDoc) / (smoothingSum + docTopicSum);
            wordLogLikelihood += wordPerTopicLikelihood;
        }
        if (wordLogLikelihood < 1e-10) {
            wordLogLikelihood += 1e-10;
            System.out.println("small likelihood in child");
        }
        wordLogLikelihood = Math.log(wordLogLikelihood);
        docLogLikelihood += value * wordLogLikelihood;
    }
    return docLogLikelihood;
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._SparseFeature(structures._SparseFeature)

Example 63 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class weightedCorrespondenceModel method initialize_probability.

@Override
protected void initialize_probability(Collection<_Doc> collection) {
    init();
    for (_Doc d : collection) {
        if (d instanceof _ParentDoc4DCM) {
            int totalWords = 0;
            double totalLambda = 0;
            m_parentDocNum += 1;
            _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
            pDoc.setTopics4Variational(number_of_topics, d_alpha, vocabulary_size, d_beta);
            totalWords += pDoc.getTotalDocLength();
            for (_Stn stnObj : pDoc.getSentences()) stnObj.setTopicsVct(number_of_topics);
            for (_ChildDoc cDoc : pDoc.m_childDocs) {
                totalWords += cDoc.getTotalDocLength();
                m_childDocNum += 1;
                cDoc.setTopics4Variational(number_of_topics, d_alpha);
                // update the article thread sufficient statistics
                for (int n = 0; n < cDoc.getSparse().length; n++) {
                    _SparseFeature fv = cDoc.getSparse()[n];
                    int wID = fv.getIndex();
                    double wVal = fv.getValue();
                    for (int k = 0; k < number_of_topics; k++) {
                        pDoc.m_lambda_stat[k][wID] += cDoc.m_phi[n][k] * wVal;
                    }
                }
            }
            for (int k = 0; k < number_of_topics; k++) {
                pDoc.m_lambda_topicStat[k] = Utils.sumOfArray(pDoc.m_lambda_stat[k]);
                totalLambda += pDoc.m_lambda_topicStat[k];
            }
        // System.out.println("totalWords\t"+totalWords+"\t"+totalLambda);
        }
    }
    imposePrior();
}
Also used : structures._Stn(structures._Stn) structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._SparseFeature(structures._SparseFeature)

Example 64 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class weightedCorrespondenceModel method calculate_log_likelihood.

@Override
public double calculate_log_likelihood(_Doc d) {
    _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
    double logLikelihood = 0;
    double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
    double alphaSum = Utils.sumOfArray(m_alpha);
    logLikelihood += Utils.lgamma(alphaSum);
    logLikelihood -= Utils.lgamma(gammaSum);
    for (int k = 0; k < number_of_topics; k++) {
        logLikelihood += -Utils.lgamma(m_alpha[k]) + (m_alpha[k] - 1) * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
        logLikelihood += Utils.lgamma(pDoc.m_sstat[k]);
        logLikelihood -= (pDoc.m_sstat[k] - 1) * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
    }
    _SparseFeature[] fvs = pDoc.getSparse();
    for (int n = 0; n < fvs.length; n++) {
        int wID = fvs[n].getIndex();
        double wVal = fvs[n].getValue();
        for (int k = 0; k < number_of_topics; k++) {
            double updateLikelihood = 0;
            updateLikelihood -= pDoc.m_phi[n][k] * (Math.log(pDoc.m_phi[n][k]));
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
            }
            updateLikelihood += pDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum));
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
            }
            updateLikelihood += pDoc.m_phi[n][k] * wVal * (Utils.digamma(pDoc.m_lambda_stat[k][wID]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
                System.out.println("wVal\t" + wVal);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
                System.out.println("gammaSum\t" + gammaSum);
                System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID] + "\t" + Utils.digamma(pDoc.m_lambda_stat[k][wID]));
                System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k] + "\t" + Utils.digamma(pDoc.m_lambda_topicStat[k]));
            }
            logLikelihood += updateLikelihood;
            if (Double.isNaN(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
                System.out.println("wVal\t" + wVal);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
                System.out.println("gammaSum\t" + gammaSum);
                System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
                System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
            }
            if (Double.isInfinite(updateLikelihood)) {
                System.out.println("\nlikelihood\t" + updateLikelihood + "\t" + logLikelihood);
                System.out.println("wVal\t" + wVal);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_phi[n][k]\t" + pDoc.m_phi[n][k]);
                System.out.println("pDoc.m_sstat[k]\t" + pDoc.m_sstat[k]);
                System.out.println("gammaSum\t" + gammaSum);
                System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
                System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
            }
        }
    }
    double alphaCSum = Utils.sumOfArray(m_alpha_c);
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        logLikelihood += Utils.lgamma(alphaCSum);
        double piSum = Utils.sumOfArray(cDoc.m_sstat);
        logLikelihood -= Utils.lgamma(piSum);
        for (int k = 0; k < number_of_topics; k++) {
            logLikelihood -= Utils.lgamma(m_alpha_c[k]);
            logLikelihood += (m_alpha_c[k] - 1) * (Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
            logLikelihood += Utils.lgamma(cDoc.m_sstat[k]);
            logLikelihood -= (cDoc.m_sstat[k] - 1) * (Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
        }
        _SparseFeature[] cDocFvs = cDoc.getSparse();
        for (int n = 0; n < cDocFvs.length; n++) {
            int wID = cDocFvs[n].getIndex();
            double wVal = cDocFvs[n].getValue();
            for (int k = 0; k < number_of_topics; k++) {
                logLikelihood += cDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum) + Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum));
                logLikelihood -= cDoc.m_phi[n][k] * (Utils.dotProduct(cDoc.m_sstat, pDoc.m_sstat) / (piSum * gammaSum * cDoc.m_zeta) + Math.log(cDoc.m_zeta) - 1);
                logLikelihood += wVal * cDoc.m_phi[n][k] * (Utils.digamma(pDoc.m_lambda_stat[k][wID]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
                logLikelihood -= cDoc.m_phi[n][k] * Math.log(cDoc.m_phi[n][k]);
                if (Double.isInfinite(logLikelihood)) {
                    System.out.println("\ncDoc likelihood\t" + "\t" + logLikelihood);
                    System.out.println("cDoc.m_phi[n][k]\t" + cDoc.m_phi[n][k]);
                    // System.out.println("pDoc.m_phi[n][k]\t"+pDoc.m_phi[n][k]);
                    System.out.println("pDoc.m_lambda_stat[][]\t" + pDoc.m_lambda_topicStat[k]);
                    System.out.println("cDoc.m_sstat[k]\t" + cDoc.m_sstat[k]);
                    System.out.println("piSum\t" + piSum);
                    // System.out.println("pDoc.m_lambda_stat[k][wID]\t" + pDoc.m_lambda_stat[k][wID]);
                    // System.out.println("pDoc.m_lambda_topicStat[k]\t" + pDoc.m_lambda_topicStat[k]);
                    System.out.println("cDoc zeta\t" + cDoc.m_zeta);
                }
            }
        }
    }
    for (int k = 0; k < number_of_topics; k++) {
        double betaSum = Utils.sumOfArray(m_beta[k]);
        logLikelihood += Utils.lgamma(betaSum);
        logLikelihood -= Utils.lgamma(pDoc.m_lambda_topicStat[k]);
        for (int v = 0; v < vocabulary_size; v++) {
            logLikelihood -= Utils.lgamma(m_beta[k][v]);
            logLikelihood += (m_beta[k][v] - 1) * (Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
            logLikelihood += Utils.lgamma(pDoc.m_lambda_stat[k][v]);
            logLikelihood -= (pDoc.m_lambda_stat[k][v] - 1) * (Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(pDoc.m_lambda_topicStat[k]));
        }
    }
    // System.out.println("doc \t"+pDoc.getName()+"\t likelihood \t"+logLikelihood);
    return logLikelihood;
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._SparseFeature(structures._SparseFeature)

Example 65 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class languageModelBaseLine method rankChild4StnByLikelihood.

protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
    HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        int cDocLen = cDoc.getTotalDocLength();
        _SparseFeature[] fv = cDoc.getSparse();
        double stnLogLikelihood = 0;
        double alphaDoc = m_smoothingMu / (m_smoothingMu + cDocLen);
        _SparseFeature[] sv = stnObj.getFv();
        for (_SparseFeature svWord : sv) {
            double featureLikelihood = 0;
            int wid = svWord.getIndex();
            double stnVal = svWord.getValue();
            int featureIndex = Utils.indexOf(fv, wid);
            if (featureIndex == -1)
                continue;
            double docVal = fv[featureIndex].getValue();
            double smoothingProb = docVal / (m_smoothingMu + cDocLen);
            smoothingProb += m_smoothingMu * m_wordSstat.get(wid) / (m_smoothingMu + cDocLen);
            featureLikelihood = Math.log(smoothingProb / (alphaDoc * m_wordSstat.get(wid)));
            stnLogLikelihood += stnVal * featureLikelihood;
        }
        stnLogLikelihood += stnObj.getLength() * Math.log(alphaDoc);
        childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
    }
    return childLikelihoodMap;
}
Also used : structures._ChildDoc(structures._ChildDoc) HashMap(java.util.HashMap) structures._SparseFeature(structures._SparseFeature)

Aggregations

structures._SparseFeature (structures._SparseFeature)94 structures._ChildDoc (structures._ChildDoc)14 structures._Doc (structures._Doc)14 structures._Review (structures._Review)14 HashMap (java.util.HashMap)7 structures._ParentDoc (structures._ParentDoc)7 structures._Stn (structures._Stn)7 Feature (Classifier.supervised.liblinear.Feature)6 FeatureNode (Classifier.supervised.liblinear.FeatureNode)6 structures._RankItem (structures._RankItem)5 File (java.io.File)3 PrintWriter (java.io.PrintWriter)3 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)2 structures._HDPThetaStar (structures._HDPThetaStar)2