Search in sources :

Example 11 with structures._ParentDoc4DCM

use of structures._ParentDoc4DCM in project IR_Base by Linda-sunshine.

the class DCMCorrLDA method updateBeta.

protected void updateBeta(int tid) {
    double diff = 0;
    double smoothingBeta = 0.1;
    int iteration = 0;
    do {
        diff = 0;
        double deltaBeta = 0;
        double wordNum4Tid = 0;
        double[] wordNum4Tid4V = new double[vocabulary_size];
        double totalBetaDenominator = 0;
        double[] totalBetaNumerator = new double[vocabulary_size];
        Arrays.fill(totalBetaNumerator, 0);
        Arrays.fill(wordNum4Tid4V, 0);
        m_totalBeta[tid] = Utils.sumOfArray(m_beta[tid]);
        double digBeta4Tid = Utils.digamma(m_totalBeta[tid]);
        for (_Doc d : m_trainSet) {
            if (d instanceof _ParentDoc) {
                _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
                totalBetaDenominator += Utils.digamma(m_totalBeta[tid] + pDoc.m_topic_stat[tid]) - digBeta4Tid;
                for (int v = 0; v < vocabulary_size; v++) {
                    wordNum4Tid += pDoc.m_wordTopic_stat[tid][v];
                    wordNum4Tid4V[v] += pDoc.m_wordTopic_stat[tid][v];
                    totalBetaNumerator[v] += Utils.digamma(m_beta[tid][v] + pDoc.m_wordTopic_stat[tid][v]);
                    totalBetaNumerator[v] -= Utils.digamma(m_beta[tid][v]);
                }
            }
        }
        for (int v = 0; v < vocabulary_size; v++) {
            if (wordNum4Tid == 0)
                break;
            if (wordNum4Tid4V[v] == 0) {
                deltaBeta = 0;
            } else {
                deltaBeta = totalBetaNumerator[v] / totalBetaDenominator;
            }
            double newBeta = m_beta[tid][v] * deltaBeta + d_beta;
            double t_diff = Math.abs(m_beta[tid][v] - newBeta);
            if (t_diff > diff)
                diff = t_diff;
            m_beta[tid][v] = newBeta;
        }
        iteration++;
    // System.out.println("beta iteration\t"+iteration);
    } while (diff > m_newtonConverge);
// System.out.println("beta iteration\t" + iteration);
}
Also used : structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) structures._ParentDoc4DCM(structures._ParentDoc4DCM)

Example 12 with structures._ParentDoc4DCM

use of structures._ParentDoc4DCM in project IR_Base by Linda-sunshine.

the class DCMCorrLDA method sampleInChildDoc.

protected void sampleInChildDoc(_ChildDoc d) {
    int wid, tid;
    double normalizedProb;
    _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d.m_parentDoc;
    for (_Word w : d.getWords()) {
        tid = w.getTopic();
        wid = w.getIndex();
        pDoc.m_wordTopic_stat[tid][wid]--;
        pDoc.m_topic_stat[tid]--;
        d.m_sstat[tid]--;
        normalizedProb = 0;
        for (tid = 0; tid < number_of_topics; tid++) {
            double pWordTopic = childWordByTopicProb(tid, wid, pDoc);
            double pTopic = childTopicInDocProb(tid, d, pDoc);
            m_topicProbCache[tid] = pWordTopic * pTopic;
            normalizedProb += m_topicProbCache[tid];
        }
        normalizedProb *= m_rand.nextDouble();
        for (tid = 0; tid < m_topicProbCache.length; tid++) {
            normalizedProb -= m_topicProbCache[tid];
            if (normalizedProb <= 0)
                break;
        }
        if (tid == m_topicProbCache.length)
            tid--;
        w.setTopic(tid);
        d.m_sstat[tid]++;
        pDoc.m_topic_stat[tid]++;
        pDoc.m_wordTopic_stat[tid][wid]++;
    }
}
Also used : structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._Word(structures._Word)

Example 13 with structures._ParentDoc4DCM

use of structures._ParentDoc4DCM in project IR_Base by Linda-sunshine.

the class DCMCorrLDA_multi_E_test method printWordTopicDistribution.

protected void printWordTopicDistribution(_Doc d, File wordTopicDistributionFolder, int k) {
    _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
    String wordTopicDistributionFile = pDoc.getName() + ".txt";
    try {
        PrintWriter pw = new PrintWriter(new File(wordTopicDistributionFolder, wordTopicDistributionFile));
        for (int i = 0; i < number_of_topics; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int v = 0; v < vocabulary_size; v++) {
                String featureName = m_corpus.getFeature(v);
                double wordProb = pDoc.m_wordTopic_prob[i][v];
                _RankItem ri = new _RankItem(featureName, wordProb);
                fVector.add(ri);
            }
            pw.format("Topic %d(%.5f):\t", i, pDoc.m_topics[i]);
            for (_RankItem it : fVector) pw.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            pw.write("\n");
        }
        pw.flush();
        pw.close();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
}
Also used : structures._RankItem(structures._RankItem) MyPriorityQueue(structures.MyPriorityQueue) structures._ParentDoc4DCM(structures._ParentDoc4DCM) FileNotFoundException(java.io.FileNotFoundException) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 14 with structures._ParentDoc4DCM

use of structures._ParentDoc4DCM in project IR_Base by Linda-sunshine.

the class DCMLDA4AC method sampleInParentDoc.

protected void sampleInParentDoc(_Doc d) {
    _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
    int wid, tid;
    double normalizedProb;
    for (_Word w : pDoc.getWords()) {
        tid = w.getTopic();
        wid = w.getIndex();
        pDoc.m_sstat[tid]--;
        pDoc.m_topic_stat[tid]--;
        pDoc.m_wordTopic_stat[tid][wid]--;
        if (!m_collectCorpusStats)
            word_topic_sstat[tid][wid]--;
        normalizedProb = 0;
        for (tid = 0; tid < number_of_topics; tid++) {
            double pWordTopic = wordTopicProb(tid, wid, pDoc);
            double pTopicPDoc = topicInDocProb(tid, pDoc);
            m_topicProbCache[tid] = pWordTopic * pTopicPDoc;
            normalizedProb += m_topicProbCache[tid];
        }
        normalizedProb *= m_rand.nextDouble();
        for (tid = 0; tid < number_of_topics; tid++) {
            normalizedProb -= m_topicProbCache[tid];
            if (normalizedProb <= 0)
                break;
        }
        if (tid == number_of_topics)
            tid--;
        w.setTopic(tid);
        pDoc.m_sstat[tid]++;
        pDoc.m_topic_stat[tid]++;
        pDoc.m_wordTopic_stat[tid][wid]++;
        if (!m_collectCorpusStats)
            word_topic_sstat[tid][wid]++;
    }
}
Also used : structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._Word(structures._Word)

Example 15 with structures._ParentDoc4DCM

use of structures._ParentDoc4DCM in project IR_Base by Linda-sunshine.

the class DCMLDA4AC method sampleInChildDoc.

protected void sampleInChildDoc(_ChildDoc d) {
    int wid, tid;
    double normalizedProb;
    _ParentDoc4DCM pDoc = (_ParentDoc4DCM) d.m_parentDoc;
    for (_Word w : d.getWords()) {
        tid = w.getTopic();
        wid = w.getIndex();
        pDoc.m_wordTopic_stat[tid][wid]--;
        pDoc.m_topic_stat[tid]--;
        d.m_sstat[tid]--;
        if (!m_collectCorpusStats)
            word_topic_sstat[tid][wid]--;
        normalizedProb = 0;
        for (tid = 0; tid < number_of_topics; tid++) {
            double pWordTopic = wordTopicProb(tid, wid, pDoc);
            double pTopic = topicInDocProb(tid, d);
            m_topicProbCache[tid] = pWordTopic * pTopic;
            normalizedProb += m_topicProbCache[tid];
        }
        normalizedProb *= m_rand.nextDouble();
        for (tid = 0; tid < m_topicProbCache.length; tid++) {
            normalizedProb -= m_topicProbCache[tid];
            if (normalizedProb <= 0)
                break;
        }
        if (tid == m_topicProbCache.length)
            tid--;
        w.setTopic(tid);
        d.m_sstat[tid]++;
        pDoc.m_topic_stat[tid]++;
        pDoc.m_wordTopic_stat[tid][wid]++;
        if (!m_collectCorpusStats)
            word_topic_sstat[tid][wid]--;
    }
}
Also used : structures._ParentDoc4DCM(structures._ParentDoc4DCM) structures._Word(structures._Word)

Aggregations

structures._ParentDoc4DCM (structures._ParentDoc4DCM)28 structures._ChildDoc (structures._ChildDoc)23 structures._Doc (structures._Doc)14 structures._Word (structures._Word)13 File (java.io.File)7 structures._Stn (structures._Stn)6 FileNotFoundException (java.io.FileNotFoundException)5 PrintWriter (java.io.PrintWriter)5 structures._SparseFeature (structures._SparseFeature)5 structures._ParentDoc (structures._ParentDoc)4 MyPriorityQueue (structures.MyPriorityQueue)3 structures._RankItem (structures._RankItem)3 HashMap (java.util.HashMap)2 LBFGS (LBFGS.LBFGS)1 JSONArray (json.JSONArray)1 JSONException (json.JSONException)1 JSONObject (json.JSONObject)1