Search in sources :

Example 66 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class languageModelBaseLine method generateReferenceModel.

public void generateReferenceModel() {
    m_allWordFrequency = 0;
    for (_Doc d : m_corpus.getCollection()) {
        _SparseFeature[] fv = d.getSparse();
        for (int i = 0; i < fv.length; i++) {
            int wid = fv[i].getIndex();
            double val = fv[i].getValue();
            m_allWordFrequency += val;
            if (m_wordSstat.containsKey(wid)) {
                double oldVal = m_wordSstat.get(wid);
                m_wordSstat.put(wid, oldVal + val);
            } else {
                m_wordSstat.put(wid, val);
            }
        }
    }
    for (int wid : m_wordSstat.keySet()) {
        double val = m_wordSstat.get(wid);
        double prob = val / m_allWordFrequency;
        m_wordSstat.put(wid, prob);
    }
}
Also used : structures._Doc(structures._Doc) structures._SparseFeature(structures._SparseFeature)

Example 67 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class HTMM method ComputeEmissionProbsForDoc.

// Construct the emission probabilities for sentences under different topics in a particular document.
void ComputeEmissionProbsForDoc(_Doc d) {
    for (int i = 0; i < d.getSenetenceSize(); i++) {
        _Stn stn = d.getSentence(i);
        Arrays.fill(emission[i], 0);
        for (int k = 0; k < this.number_of_topics; k++) {
            for (_SparseFeature w : stn.getFv()) {
                // all in log-space
                emission[i][k] += w.getValue() * topic_term_probabilty[k][w.getIndex()];
            }
        }
    }
}
Also used : structures._Stn(structures._Stn) structures._SparseFeature(structures._SparseFeature)

Example 68 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class HTSM method ComputeEmissionProbsForDoc.

@Override
// Construct the emission probabilities for sentences under different topics in a particular document.
void ComputeEmissionProbsForDoc(_Doc d) {
    for (int i = 0; i < d.getSenetenceSize(); i++) {
        _Stn stn = d.getSentence(i);
        Arrays.fill(emission[i], 0);
        int start = 0, end = this.number_of_topics;
        if (i == 0 && d.getSourceType() == 2) {
            // first sentence is specially handled for newEgg
            // get the sentiment label of the first sentence
            int sentimentLabel = stn.getStnSentiLabel();
            if (sentimentLabel == 0) {
                // positive sentiment in the first half
                end = this.number_of_topics / 2;
                for (int k = end; k < this.number_of_topics; k++) emission[i][k] = Double.NEGATIVE_INFINITY;
            } else if (sentimentLabel == 1) {
                // negative sentiment in the second half
                start = this.number_of_topics / 2;
                for (int k = 0; k < start; k++) emission[i][k] = Double.NEGATIVE_INFINITY;
            }
        }
        for (int k = start; k < end; k++) {
            for (_SparseFeature w : stn.getFv()) {
                // all in log-space
                emission[i][k] += w.getValue() * topic_term_probabilty[k][w.getIndex()];
            }
        }
    }
}
Also used : structures._Stn(structures._Stn) structures._SparseFeature(structures._SparseFeature)

Example 69 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class pLSA method calculate_E_step.

@Override
public double calculate_E_step(_Doc d) {
    // background proportion
    double propB;
    // expectation of each term under topic assignment
    double exp;
    for (_SparseFeature fv : d.getSparse()) {
        // jth word in doc
        int j = fv.getIndex();
        double v = fv.getValue();
        // -----------------compute posterior-----------
        double sum = 0;
        for (int k = 0; k < this.number_of_topics; k++) // shall we compute it in log space?
        sum += d.m_topics[k] * topic_term_probabilty[k][j];
        propB = m_lambda * background_probability[j];
        // posterior of background probability
        propB /= propB + (1 - m_lambda) * sum;
        // -----------------compute and accumulate expectations-----------
        for (int k = 0; k < this.number_of_topics; k++) {
            exp = v * (1 - propB) * d.m_topics[k] * topic_term_probabilty[k][j] / sum;
            d.m_sstat[k] += exp;
            if (// when testing, we don't need to collect sufficient statistics
            m_collectCorpusStats)
                word_topic_sstat[k][j] += exp;
        }
    }
    if (m_collectCorpusStats == false || m_converge > 0)
        return calculate_log_likelihood(d);
    else
        // no need to compute likelihood
        return 1;
}
Also used : structures._SparseFeature(structures._SparseFeature)

Example 70 with structures._SparseFeature

use of structures._SparseFeature in project IR_Base by Linda-sunshine.

the class pLSA method initialize_probability.

@Override
protected void initialize_probability(Collection<_Doc> collection) {
    // initialize topic document proportion, p(z|d)
    // initialize background topic
    Arrays.fill(background_probability, d_beta - 1.0);
    for (_Doc d : collection) {
        // allocate memory and randomize it
        d.setTopics(number_of_topics, d_alpha - 1.0);
        for (_SparseFeature fv : d.getSparse()) background_probability[fv.getIndex()] += fv.getValue();
    }
    Utils.L1Normalization(background_probability);
    // initialize term topic matrix p(w|z,\phi)
    for (int i = 0; i < number_of_topics; i++) Utils.randomize(word_topic_sstat[i], d_beta - 1.0);
    imposePrior();
    calculate_M_step(0);
}
Also used : structures._Doc(structures._Doc) structures._SparseFeature(structures._SparseFeature)

Aggregations

structures._SparseFeature (structures._SparseFeature)94 structures._ChildDoc (structures._ChildDoc)14 structures._Doc (structures._Doc)14 structures._Review (structures._Review)14 HashMap (java.util.HashMap)7 structures._ParentDoc (structures._ParentDoc)7 structures._Stn (structures._Stn)7 Feature (Classifier.supervised.liblinear.Feature)6 FeatureNode (Classifier.supervised.liblinear.FeatureNode)6 structures._RankItem (structures._RankItem)5 File (java.io.File)3 PrintWriter (java.io.PrintWriter)3 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)2 structures._HDPThetaStar (structures._HDPThetaStar)2