Search in sources :

Example 36 with MyPriorityQueue

use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.

the class CLRWithDP method printInfo.

public void printInfo() {
    MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(5);
    // clear the statistics
    for (int i = 0; i < m_kBar; i++) {
        m_thetaStars[i].resetCount();
        clusterRanker.add(new _RankItem(i, m_thetaStars[i].getMemSize()));
    }
    // collect statistics across users in adaptation data
    _thetaStar theta = null;
    for (int i = 0; i < m_userList.size(); i++) {
        _DPAdaptStruct user = (_DPAdaptStruct) m_userList.get(i);
        theta = user.getThetaStar();
        for (_Review review : user.getReviews()) {
            if (review.getType() != rType.ADAPTATION)
                // only touch the adaptation data
                continue;
            else if (review.getYLabel() == 1)
                theta.incPosCount();
            else
                theta.incNegCount();
        }
    }
    System.out.print("[Info]Clusters:");
    for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_thetaStars[i].showStat());
    System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
}
Also used : structures._thetaStar(structures._thetaStar) structures._RankItem(structures._RankItem) structures._Review(structures._Review) MyPriorityQueue(structures.MyPriorityQueue)

Example 37 with MyPriorityQueue

use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.

the class CLRWithHDP method printInfo.

public void printInfo(boolean printDetails) {
    MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(10);
    // clear the statistics
    for (int i = 0; i < m_kBar; i++) {
        m_hdpThetaStars[i].resetCount();
        // get the most popular clusters
        clusterRanker.add(new _RankItem(i, m_hdpThetaStars[i].getMemSize()));
    }
    // collect statistics across users in adaptation data
    _HDPThetaStar theta = null;
    _HDPAdaptStruct user;
    for (int i = 0; i < m_userList.size(); i++) {
        user = (_HDPAdaptStruct) m_userList.get(i);
        for (_Review r : user.getReviews()) {
            if (r.getType() != rType.ADAPTATION)
                // only touch the adaptation data
                continue;
            else {
                theta = r.getHDPThetaStar();
                if (r.getYLabel() == 1)
                    theta.incPosCount();
                else
                    theta.incNegCount();
            }
        }
    }
    System.out.print("[Info]Clusters:");
    for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_hdpThetaStars[i].showStat());
    if (m_features == null)
        System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
    else if (printDetails) {
        System.out.print(String.format("\n[Info]%d Clusters are found in total! And the highligt is as follows\n", m_kBar));
        accumulateFeatureCount();
        for (_RankItem it : clusterRanker) printTopWords(m_hdpThetaStars[it.m_index]);
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Review(structures._Review) MyPriorityQueue(structures.MyPriorityQueue) structures._HDPThetaStar(structures._HDPThetaStar)

Example 38 with MyPriorityQueue

use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.

the class DCMLDA method printTopWords.

@Override
public void printTopWords(int k, String betaFile) {
    double logLikelihood = calculate_log_likelihood();
    System.out.format("final log likelihood %.3f\t", logLikelihood);
    System.out.println("TopWord FilePath:" + betaFile);
    Arrays.fill(m_sstat, 0);
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        PrintWriter topWordWriter = new PrintWriter(new File(betaFile));
        for (int i = 0; i < m_beta.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
            topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            topWordWriter.write("\n");
        }
        topWordWriter.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 39 with MyPriorityQueue

use of structures.MyPriorityQueue in project IR_Base by Linda-sunshine.

the class DCMLDA_test method printWordTopicDistribution.

protected void printWordTopicDistribution(_Doc d, File wordTopicDistributionFolder, int k) {
    String wordTopicDistributionFile = d.getName() + ".txt";
    try {
        PrintWriter pw = new PrintWriter(new File(wordTopicDistributionFolder, wordTopicDistributionFile));
        _Doc4DCMLDA DCMDoc = (_Doc4DCMLDA) d;
        for (int i = 0; i < number_of_topics; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int v = 0; v < vocabulary_size; v++) {
                String featureName = m_corpus.getFeature(v);
                double wordProb = DCMDoc.m_wordTopic_prob[i][v];
                _RankItem ri = new _RankItem(featureName, wordProb);
                fVector.add(ri);
            }
            pw.format("Topic %d(%.5f):\t", i, d.m_topics[i]);
            for (_RankItem it : fVector) pw.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            pw.write("\n");
        }
        pw.flush();
        pw.close();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc4DCMLDA(structures._Doc4DCMLDA) MyPriorityQueue(structures.MyPriorityQueue) FileNotFoundException(java.io.FileNotFoundException) File(java.io.File) PrintWriter(java.io.PrintWriter)

Aggregations

MyPriorityQueue (structures.MyPriorityQueue)39 structures._RankItem (structures._RankItem)39 File (java.io.File)27 PrintWriter (java.io.PrintWriter)27 structures._Doc (structures._Doc)25 FileNotFoundException (java.io.FileNotFoundException)20 structures._ParentDoc4DCM (structures._ParentDoc4DCM)3 IOException (java.io.IOException)2 structures._Doc4DCMLDA (structures._Doc4DCMLDA)2 structures._Review (structures._Review)2 SparseDoubleMatrix2D (cern.colt.matrix.tdouble.impl.SparseDoubleMatrix2D)1 ArrayList (java.util.ArrayList)1 structures._HDPThetaStar (structures._HDPThetaStar)1 structures._Node (structures._Node)1 structures._QUPair (structures._QUPair)1 structures._Query (structures._Query)1 structures._SparseFeature (structures._SparseFeature)1 structures._Stn (structures._Stn)1 structures._thetaStar (structures._thetaStar)1