Search in sources :

Example 46 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class ACCTM method calculate_log_likelihood4Child.

protected double calculate_log_likelihood4Child(_Doc d) {
    _ChildDoc cDoc = (_ChildDoc) d;
    double docLogLikelihood = 0.0;
    // prepare compute the normalizers
    _SparseFeature[] fv = cDoc.getSparse();
    for (int i = 0; i < fv.length; i++) {
        int wid = fv[i].getIndex();
        double value = fv[i].getValue();
        double wordLogLikelihood = 0;
        for (int k = 0; k < number_of_topics; k++) {
            double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDocProb(k, cDoc);
            wordLogLikelihood += wordPerTopicLikelihood;
        }
        if (Math.abs(wordLogLikelihood) < 1e-10) {
            System.out.println("wordLoglikelihood\t" + wordLogLikelihood);
            wordLogLikelihood += 1e-10;
        }
        wordLogLikelihood = Math.log(wordLogLikelihood);
        docLogLikelihood += value * wordLogLikelihood;
    }
    return docLogLikelihood;
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._SparseFeature(structures._SparseFeature)

Example 47 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class ACCTM method childTopicInDocProb.

protected double childTopicInDocProb(int tid, _ChildDoc d) {
    _ParentDoc pDoc = (_ParentDoc) (d.m_parentDoc);
    double pDocTopicSum = Utils.sumOfArray(pDoc.m_sstat);
    double cDocTopicSum = Utils.sumOfArray(d.m_sstat);
    return (d_alpha + d.getMu() * d.m_parentDoc.m_sstat[tid] / pDocTopicSum + d.m_sstat[tid]) / (m_kAlpha + d.getMu() + cDocTopicSum);
}
Also used : structures._ParentDoc(structures._ParentDoc)

Example 48 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class ACCTM method computeTestMu4Doc.

protected void computeTestMu4Doc(_ChildDoc d) {
    _ParentDoc pDoc = d.m_parentDoc;
    double mu = Utils.cosine(d.getSparseVct4Infer(), pDoc.getSparseVct4Infer());
    mu = 1e32;
    d.setMu(mu);
}
Also used : structures._ParentDoc(structures._ParentDoc)

Example 49 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class ACCTM method initTest.

protected void initTest(ArrayList<_Doc> sampleTestSet, _Doc d) {
    _ParentDoc pDoc = (_ParentDoc) d;
    for (_Stn stnObj : pDoc.getSentences()) {
        stnObj.setTopicsVct(number_of_topics);
    }
    int testLength = 0;
    pDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
    sampleTestSet.add(pDoc);
    pDoc.createSparseVct4Infer();
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        testLength = (int) (m_testWord4PerplexityProportion * cDoc.getTotalDocLength());
        cDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
        sampleTestSet.add(cDoc);
        cDoc.createSparseVct4Infer();
        computeTestMu4Doc(cDoc);
    }
}
Also used : structures._Stn(structures._Stn) structures._ChildDoc(structures._ChildDoc) structures._ParentDoc(structures._ParentDoc)

Example 50 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class ACCTM method sampleInChildDoc.

protected void sampleInChildDoc(_Doc d) {
    _ChildDoc cDoc = (_ChildDoc) d;
    int wid, tid;
    double normalizedProb;
    for (_Word w : cDoc.getWords()) {
        wid = w.getIndex();
        tid = w.getTopic();
        cDoc.m_sstat[tid]--;
        if (m_collectCorpusStats) {
            word_topic_sstat[tid][wid]--;
            m_sstat[tid]--;
        }
        normalizedProb = 0;
        for (tid = 0; tid < number_of_topics; tid++) {
            double pWordTopic = childWordByTopicProb(tid, wid);
            double pTopicCDoc = childTopicInDocProb(tid, cDoc);
            m_topicProbCache[tid] = pWordTopic * pTopicCDoc;
            normalizedProb += m_topicProbCache[tid];
        }
        normalizedProb *= m_rand.nextDouble();
        for (tid = 0; tid < number_of_topics; tid++) {
            normalizedProb -= m_topicProbCache[tid];
            if (normalizedProb <= 0)
                break;
        }
        if (tid == number_of_topics)
            tid--;
        w.setTopic(tid);
        cDoc.m_sstat[tid]++;
        if (m_collectCorpusStats) {
            word_topic_sstat[tid][wid]++;
            m_sstat[tid]++;
        }
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._Word(structures._Word)

Aggregations

structures._ChildDoc (structures._ChildDoc)77 structures._ParentDoc (structures._ParentDoc)47 structures._Doc (structures._Doc)35 structures._Stn (structures._Stn)25 structures._Word (structures._Word)22 File (java.io.File)18 structures._ParentDoc4DCM (structures._ParentDoc4DCM)16 structures._SparseFeature (structures._SparseFeature)16 HashMap (java.util.HashMap)14 PrintWriter (java.io.PrintWriter)12 FileNotFoundException (java.io.FileNotFoundException)11 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)2 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1 Parameter (Classifier.supervised.liblinear.Parameter)1 Problem (Classifier.supervised.liblinear.Problem)1 SolverType (Classifier.supervised.liblinear.SolverType)1