Search in sources :

Example 11 with structures._Stn

use of structures._Stn in project IR_Base by Linda-sunshine.

the class corrLDA_Gibbs method initialize_probability.

@Override
protected void initialize_probability(Collection<_Doc> collection) {
    createSpace();
    for (int i = 0; i < number_of_topics; i++) Arrays.fill(word_topic_sstat[i], d_beta);
    Arrays.fill(m_sstat, d_beta * vocabulary_size);
    for (_Doc d : collection) {
        if (d instanceof _ParentDoc) {
            for (_Stn stnObj : d.getSentences()) {
                stnObj.setTopicsVct(number_of_topics);
            }
            d.setTopics4Gibbs(number_of_topics, 0);
        } else if (d instanceof _ChildDoc) {
            ((_ChildDoc) d).setTopics4Gibbs_LDA(number_of_topics, 0);
        }
        for (_Word w : d.getWords()) {
            word_topic_sstat[w.getTopic()][w.getIndex()]++;
            m_sstat[w.getTopic()]++;
        }
    }
    imposePrior();
    m_statisticsNormalized = false;
}
Also used : structures._Stn(structures._Stn) structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) structures._Word(structures._Word)

Example 12 with structures._Stn

use of structures._Stn in project IR_Base by Linda-sunshine.

the class corrLDA_Gibbs method initTest.

protected void initTest(ArrayList<_Doc> sampleTestSet, _Doc d) {
    _ParentDoc pDoc = (_ParentDoc) d;
    for (_Stn stnObj : pDoc.getSentences()) {
        stnObj.setTopicsVct(number_of_topics);
    }
    int testLength = (int) (m_testWord4PerplexityProportion * pDoc.getTotalDocLength());
    pDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
    sampleTestSet.add(pDoc);
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        testLength = (int) (m_testWord4PerplexityProportion * cDoc.getTotalDocLength());
        cDoc.setTopics4GibbsTest(number_of_topics, 0, testLength);
        sampleTestSet.add(cDoc);
    }
}
Also used : structures._Stn(structures._Stn) structures._ChildDoc(structures._ChildDoc) structures._ParentDoc(structures._ParentDoc)

Example 13 with structures._Stn

use of structures._Stn in project IR_Base by Linda-sunshine.

the class languageModelBaseLine method rankChild4StnByLanguageModel.

protected HashMap<String, Double> rankChild4StnByLanguageModel(_Stn stnObj, _ParentDoc pDoc) {
    HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
    double smoothingMu = 1000;
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        int cDocLen = cDoc.getTotalDocLength();
        _SparseFeature[] fv = cDoc.getSparse();
        double stnLogLikelihood = 0;
        double alphaDoc = smoothingMu / (smoothingMu + cDocLen);
        _SparseFeature[] sv = stnObj.getFv();
        for (_SparseFeature svWord : sv) {
            double featureLikelihood = 0;
            int wid = svWord.getIndex();
            double stnVal = svWord.getValue();
            int featureIndex = Utils.indexOf(fv, wid);
            double docVal = 0;
            if (featureIndex != -1) {
                docVal = fv[featureIndex].getValue();
            }
            double smoothingProb = (1 - alphaDoc) * docVal / (cDocLen);
            smoothingProb += alphaDoc * getReferenceProb(wid);
            featureLikelihood = Math.log(smoothingProb);
            stnLogLikelihood += stnVal * featureLikelihood;
        }
        childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
    }
    return childLikelihoodMap;
}
Also used : structures._ChildDoc(structures._ChildDoc) HashMap(java.util.HashMap) structures._SparseFeature(structures._SparseFeature)

Example 14 with structures._Stn

use of structures._Stn in project IR_Base by Linda-sunshine.

the class HTMM method accPhiStat.

// probabilities of topic assignment
void accPhiStat(_Doc d) {
    double prob;
    for (int t = 0; t < d.getSenetenceSize(); t++) {
        _Stn s = d.getSentence(t);
        for (_SparseFeature f : s.getFv()) {
            int wid = f.getIndex();
            // frequency
            double v = f.getValue();
            for (int i = 0; i < this.number_of_topics; i++) {
                prob = this.p_dwzpsi[t][i];
                for (int j = 1; j < this.constant; j++) prob += this.p_dwzpsi[t][i + j * this.number_of_topics];
                this.word_topic_sstat[i][wid] += v * prob;
            }
        }
    }
}
Also used : structures._Stn(structures._Stn) structures._SparseFeature(structures._SparseFeature)

Example 15 with structures._Stn

use of structures._Stn in project IR_Base by Linda-sunshine.

the class HTMM method docSummary.

public void docSummary(String[] productList) {
    for (String prodID : productList) {
        for (int i = 0; i < this.number_of_topics; i++) {
            // top three sentences per topic per product
            MyPriorityQueue<_RankItem> stnQueue = new MyPriorityQueue<_RankItem>(3);
            for (_Doc d : m_trainSet) {
                if (d.getItemID().equalsIgnoreCase(prodID)) {
                    for (int j = 0; j < d.getSenetenceSize(); j++) {
                        _Stn sentence = d.getSentence(j);
                        double prob = d.m_topics[i];
                        for (_SparseFeature f : sentence.getFv()) prob += f.getValue() * topic_term_probabilty[i][f.getIndex()];
                        prob /= sentence.getLength();
                        stnQueue.add(new _RankItem(sentence.getRawSentence(), prob));
                    }
                }
            }
            System.out.format("Product: %s, Topic: %d\n", prodID, i);
            summaryWriter.format("Product: %s, Topic: %d\n", prodID, i);
            for (_RankItem it : stnQueue) {
                System.out.format("%s\t%.3f\n", it.m_name, it.m_value);
                summaryWriter.format("%s\t%.3f\n", it.m_name, it.m_value);
            }
        }
    }
    summaryWriter.flush();
    summaryWriter.close();
}
Also used : structures._Stn(structures._Stn) structures._RankItem(structures._RankItem) MyPriorityQueue(structures.MyPriorityQueue) structures._Doc(structures._Doc) structures._SparseFeature(structures._SparseFeature)

Aggregations

structures._Stn (structures._Stn)46 structures._ChildDoc (structures._ChildDoc)33 structures._ParentDoc (structures._ParentDoc)27 structures._Doc (structures._Doc)22 HashMap (java.util.HashMap)19 File (java.io.File)17 PrintWriter (java.io.PrintWriter)17 structures._Word (structures._Word)16 FileNotFoundException (java.io.FileNotFoundException)15 structures._SparseFeature (structures._SparseFeature)12 structures._ParentDoc4DCM (structures._ParentDoc4DCM)6 Map (java.util.Map)5 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)4 ArrayList (java.util.ArrayList)3 IOException (java.io.IOException)2 ParseException (java.text.ParseException)2 TokenizeResult (structures.TokenizeResult)2 TreeMap (java.util.TreeMap)1 MyPriorityQueue (structures.MyPriorityQueue)1 structures._ChildDoc4BaseWithPhi_Hard (structures._ChildDoc4BaseWithPhi_Hard)1