Search in sources :

Example 11 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class LDA_Gibbs method initialize_probability.

@Override
protected void initialize_probability(Collection<_Doc> collection) {
    for (int i = 0; i < number_of_topics; i++) Arrays.fill(word_topic_sstat[i], d_beta);
    Arrays.fill(m_sstat, d_beta * vocabulary_size);
    Arrays.fill(m_topicProbCache, 0);
    // initialize topic-word allocation, p(w|z)
    for (_Doc d : collection) {
        // allocate memory and randomize it
        d.setTopics4Gibbs(number_of_topics, d_alpha);
        for (_Word w : d.getWords()) {
            word_topic_sstat[w.getTopic()][w.getIndex()]++;
            m_sstat[w.getTopic()]++;
        }
    }
    imposePrior();
}
Also used : structures._Doc(structures._Doc) structures._Word(structures._Word)

Example 12 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class LDA_Gibbs method calculate_log_likelihood.

@Override
protected double calculate_log_likelihood(_Doc d) {
    int tid, wid;
    double logLikelihood = docThetaLikelihood(d), docSum = Utils.sumOfArray(d.m_sstat);
    for (_Word w : d.getWords()) {
        wid = w.getIndex();
        tid = w.getTopic();
        logLikelihood += Math.log(d.m_sstat[tid] / docSum * word_topic_sstat[tid][wid] / m_sstat[tid]);
    }
    return logLikelihood;
}
Also used : structures._Word(structures._Word)

Example 13 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class LDA_Gibbs method calculate_E_step.

@Override
public double calculate_E_step(_Doc d) {
    d.permutation();
    double p;
    int wid, tid;
    for (_Word w : d.getWords()) {
        wid = w.getIndex();
        tid = w.getTopic();
        // remove the word's topic assignment
        d.m_sstat[tid]--;
        if (m_collectCorpusStats) {
            word_topic_sstat[tid][wid]--;
            m_sstat[tid]--;
        }
        // perform random sampling
        p = 0;
        for (tid = 0; tid < number_of_topics; tid++) {
            m_topicProbCache[tid] = topicInDocProb(tid, d) * wordByTopicProb(tid, wid);
            p += m_topicProbCache[tid];
        }
        p *= m_rand.nextDouble();
        tid = -1;
        while (p > 0 && tid < number_of_topics - 1) {
            tid++;
            p -= m_topicProbCache[tid];
        }
        // assign the selected topic to word
        w.setTopic(tid);
        d.m_sstat[tid]++;
        if (m_collectCorpusStats) {
            word_topic_sstat[tid][wid]++;
            m_sstat[tid]++;
        }
    }
    return 0;
// if (m_collectCorpusStats == false || m_converge>0)
// return 0;
// //			return calculate_log_likelihood(d);
// else
// return 0;
}
Also used : structures._Word(structures._Word)

Example 14 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class LDA_Gibbs_test method cal_logLikelihood_partial.

protected double cal_logLikelihood_partial(_Doc d) {
    double docLogLikelihood = 0;
    for (_Word w : d.getTestWords()) {
        int wid = w.getIndex();
        double wordLogLikelihood = 0;
        for (int k = 0; k < number_of_topics; k++) {
            double wordPerTopicLikelihood = d.m_topics[k] * topic_term_probabilty[k][wid];
            wordLogLikelihood += wordPerTopicLikelihood;
        }
        docLogLikelihood += Math.log(wordLogLikelihood);
    }
    return docLogLikelihood;
}
Also used : structures._Word(structures._Word)

Example 15 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class sparseLDA method sampleTopicAssignment.

protected void sampleTopicAssignment(_Doc4SparseDCMLDA DCMDoc) {
    int wid, tid;
    double p;
    for (_Word w : DCMDoc.getWords()) {
        wid = w.getIndex();
        tid = w.getTopic();
        DCMDoc.m_sstat[tid]--;
        if (m_collectCorpusStats) {
            word_topic_sstat[tid][wid]--;
            m_sstat[tid]--;
        }
        p = 0;
        // why do we need this? isn't it a constant?
        double denominator = DCMDoc.m_alphaDoc + Utils.sumOfArray(DCMDoc.m_sstat);
        for (tid = 0; tid < number_of_topics; tid++) {
            m_topicProbCache[tid] = 0;
            if (DCMDoc.m_topicIndicator[tid] == false)
                continue;
            m_topicProbCache[tid] = topicInDocProb(tid, denominator, DCMDoc) * wordByTopicProb(tid, wid);
            p += m_topicProbCache[tid];
        }
        p *= m_rand.nextDouble();
        tid = 0;
        while (p > 0 && tid < number_of_topics - 1) {
            p -= m_topicProbCache[tid];
            tid++;
        }
        w.setTopic(tid);
        DCMDoc.m_sstat[tid]++;
        if (m_collectCorpusStats) {
            word_topic_sstat[tid][wid]++;
            m_sstat[tid]++;
        }
    }
}
Also used : structures._Word(structures._Word)

Aggregations

structures._Word (structures._Word)69 structures._ChildDoc (structures._ChildDoc)18 File (java.io.File)16 FileNotFoundException (java.io.FileNotFoundException)15 PrintWriter (java.io.PrintWriter)15 structures._ParentDoc (structures._ParentDoc)14 structures._Doc (structures._Doc)12 structures._Stn (structures._Stn)11 structures._ParentDoc4DCM (structures._ParentDoc4DCM)10 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)9 HashMap (java.util.HashMap)5 structures._Doc4DCMLDA (structures._Doc4DCMLDA)4 structures._Doc4SparseDCMLDA (structures._Doc4SparseDCMLDA)4 structures._SparseFeature (structures._SparseFeature)3 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1 Parameter (Classifier.supervised.liblinear.Parameter)1 Problem (Classifier.supervised.liblinear.Problem)1 SolverType (Classifier.supervised.liblinear.SolverType)1