Search in sources :

Example 1 with structures._Doc4DCMLDA

use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.

the class sparseDCMLDA method updateBeta.

@Override
protected void updateBeta(int tid) {
    double diff = 0;
    int iteration = 0;
    double smoothingBeta = 0.1;
    do {
        diff = 0;
        double deltaBeta = 0;
        double wordNum4Tid = 0;
        double[] wordNum4Tid4V = new double[vocabulary_size];
        double totalBetaDenominator = 0;
        double[] totalBetaNumerator = new double[vocabulary_size];
        Arrays.fill(totalBetaNumerator, 0);
        Arrays.fill(wordNum4Tid4V, 0);
        m_totalBeta[tid] = Utils.sumOfArray(m_beta[tid]);
        double digBeta4Tid = Utils.digamma(m_mu * m_totalBeta[tid]);
        for (_Doc d : m_trainSet) {
            _Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
            totalBetaDenominator += Utils.digamma(m_mu * m_totalBeta[tid] + DCMDoc.m_sstat[tid]) - digBeta4Tid;
            for (int v = 0; v < vocabulary_size; v++) {
                wordNum4Tid += DCMDoc.m_wordTopic_stat[tid][v];
                wordNum4Tid4V[v] += DCMDoc.m_wordTopic_stat[tid][v];
                totalBetaNumerator[v] += Utils.digamma(m_mu * m_beta[tid][v] + DCMDoc.m_wordTopic_stat[tid][v]);
                totalBetaNumerator[v] -= Utils.digamma(m_mu * m_beta[tid][v]);
            }
        }
        for (int v = 0; v < vocabulary_size; v++) {
            if (wordNum4Tid == 0)
                break;
            if (wordNum4Tid4V[v] == 0) {
                deltaBeta = 0;
            } else {
                deltaBeta = totalBetaNumerator[v] / totalBetaDenominator;
            }
            double newBeta = m_beta[tid][v] * deltaBeta + d_beta;
            double t_diff = Math.abs(m_beta[tid][v] - newBeta);
            if (t_diff > diff)
                diff = t_diff;
            m_beta[tid][v] = newBeta;
        }
        iteration++;
    } while ((diff > m_newtonConverge) && (iteration < m_newtonIter));
    System.out.println("iteration\t" + iteration);
}
Also used : structures._Doc4DCMLDA(structures._Doc4DCMLDA) structures._Doc(structures._Doc)

Example 2 with structures._Doc4DCMLDA

use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.

the class DCMLDA method estThetaInDoc.

@Override
protected void estThetaInDoc(_Doc d) {
    _Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
    for (int i = 0; i < number_of_topics; i++) Utils.L1Normalization(DCMDoc.m_wordTopic_prob[i]);
    Utils.L1Normalization(d.m_topics);
}
Also used : structures._Doc4DCMLDA(structures._Doc4DCMLDA)

Example 3 with structures._Doc4DCMLDA

use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.

the class DCMLDA method cal_logLikelihood4Partial.

protected double cal_logLikelihood4Partial(_Doc d) {
    double docLogLikelihood = 0;
    _Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
    for (_Word w : DCMDoc.getTestWords()) {
        int wid = w.getIndex();
        double wordLogLikelihood = 0;
        for (int k = 0; k < number_of_topics; k++) {
            double wordPerTopicLikelihood = DCMDoc.m_topics[k] * DCMDoc.m_wordTopic_prob[k][wid];
            wordLogLikelihood += wordPerTopicLikelihood;
        }
        docLogLikelihood += Math.log(wordLogLikelihood);
    }
    return docLogLikelihood;
}
Also used : structures._Doc4DCMLDA(structures._Doc4DCMLDA) structures._Word(structures._Word)

Example 4 with structures._Doc4DCMLDA

use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.

the class DCMLDA method updateBeta.

protected void updateBeta(int tid) {
    double diff = 0;
    int iteration = 0;
    do {
        double deltaBeta = 0;
        double wordNum4Tid = 0;
        double[] wordNum4Tid4V = new double[vocabulary_size];
        double totalBetaDenominator = 0;
        double[] totalBetaNumerator = new double[vocabulary_size];
        m_totalBeta[tid] = Utils.sumOfArray(m_beta[tid]);
        double digBeta4Tid = Utils.digamma(m_totalBeta[tid]);
        for (_Doc d : m_trainSet) {
            _Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
            totalBetaDenominator += Utils.digamma(m_totalBeta[tid] + DCMDoc.m_sstat[tid]) - digBeta4Tid;
            for (int v = 0; v < vocabulary_size; v++) {
                wordNum4Tid += DCMDoc.m_wordTopic_stat[tid][v];
                wordNum4Tid4V[v] += DCMDoc.m_wordTopic_stat[tid][v];
                totalBetaNumerator[v] += Utils.digamma(m_beta[tid][v] + DCMDoc.m_wordTopic_stat[tid][v]) - Utils.digamma(m_beta[tid][v]);
            }
        }
        diff = 0;
        for (int v = 0; v < vocabulary_size; v++) {
            if (wordNum4Tid == 0)
                // this is almost impossible
                break;
            else if (wordNum4Tid4V[v] == 0)
                // why this goes to zero?
                deltaBeta = 0;
            else
                deltaBeta = totalBetaNumerator[v] / totalBetaDenominator;
            double newBeta = m_beta[tid][v] * deltaBeta + d_beta;
            double t_diff = Math.abs(m_beta[tid][v] - newBeta);
            if (t_diff > diff)
                diff = t_diff;
            m_beta[tid][v] = newBeta;
        }
        iteration++;
    } while ((diff > m_newtonConverge) && (iteration < m_newtonIter));
    System.out.format("[Info]beta update finishes at iteration %d, diff: %.3f", iteration, diff);
}
Also used : structures._Doc4DCMLDA(structures._Doc4DCMLDA) structures._Doc(structures._Doc)

Example 5 with structures._Doc4DCMLDA

use of structures._Doc4DCMLDA in project IR_Base by Linda-sunshine.

the class sparseDCMLDA_test method printWordTopicDistribution.

protected void printWordTopicDistribution(_Doc d, File wordTopicDistributionFolder, int k) {
    String wordTopicDistributionFile = d.getName() + ".txt";
    try {
        PrintWriter pw = new PrintWriter(new File(wordTopicDistributionFolder, wordTopicDistributionFile));
        _Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
        for (int i = 0; i < number_of_topics; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int v = 0; v < vocabulary_size; v++) {
                String featureName = m_corpus.getFeature(v);
                double wordProb = DCMDoc.m_wordTopic_prob[i][v];
                _RankItem ri = new _RankItem(featureName, wordProb);
                fVector.add(ri);
            }
            pw.format("Topic %d(%.5f):\t", i, d.m_topics[i]);
            for (_RankItem it : fVector) pw.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            pw.write("\n");
        }
        pw.flush();
        pw.close();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc4DCMLDA(structures._Doc4DCMLDA) MyPriorityQueue(structures.MyPriorityQueue) FileNotFoundException(java.io.FileNotFoundException) File(java.io.File) PrintWriter(java.io.PrintWriter)

Aggregations

structures._Doc4DCMLDA (structures._Doc4DCMLDA)12 structures._Doc (structures._Doc)4 structures._Word (structures._Word)4 File (java.io.File)2 FileNotFoundException (java.io.FileNotFoundException)2 PrintWriter (java.io.PrintWriter)2 MyPriorityQueue (structures.MyPriorityQueue)2 structures._RankItem (structures._RankItem)2