Search in sources :

Example 26 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class CLRWithHDP method printTopWords.

void printTopWords(_HDPThetaStar cluster) {
    MyPriorityQueue<_RankItem> wordRanker = new MyPriorityQueue<_RankItem>(30);
    double[] lmStat = cluster.getLMStat();
    double[] phi = cluster.getModel();
    int[] tfs = m_tf_count[cluster.getIndex()];
    double tf;
    // features with positive/negative weights (skip the bias term)
    System.out.format("Cluster %d (%d)\n[positive]: ", cluster.getIndex(), cluster.getMemSize());
    for (int i = 1; i < phi.length; i++) {
        tf = tfs[i - 1] == 0 ? 0.1 : tfs[i - 1];
        // top positive words with expected polarity
        wordRanker.add(new _RankItem(i, phi[i] * tf));
    }
    for (_RankItem it : wordRanker) System.out.format("%s:%.3f\t", m_features[it.m_index], phi[it.m_index]);
    // features with negative weights
    wordRanker.clear();
    System.out.format("\n[negative]: ");
    for (int i = 1; i < phi.length; i++) {
        tf = tfs[i - 1] == 0 ? 0.1 : tfs[i - 1];
        // top negative words
        wordRanker.add(new _RankItem(i, -phi[i] * tf));
    }
    for (_RankItem it : wordRanker) System.out.format("%s:%.3f\t", m_features[it.m_index], phi[it.m_index]);
    // features with highest frequency
    wordRanker.clear();
    System.out.format("\n[popular]: ");
    for (int i = 0; i < lmStat.length; i++) // top positive words with expected polarity
    wordRanker.add(new _RankItem(i, lmStat[i]));
    for (_RankItem it : wordRanker) System.out.format("%s:%.1f\t", m_lmFeatures.get(it.m_index), lmStat[it.m_index]);
    System.out.println();
}
Also used : structures._RankItem(structures._RankItem) MyPriorityQueue(structures.MyPriorityQueue)

Example 27 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class sparseClusterDCMLDA_test method printTopBeta.

protected void printTopBeta(int k, String topBetaFile) {
    System.out.println("TopWord FilePath:" + topBetaFile);
    Arrays.fill(m_sstat, 0);
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        PrintWriter topWordWriter = new PrintWriter(new File(topBetaFile));
        for (int i = 0; i < m_beta.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
            topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            topWordWriter.write("\n");
        }
        topWordWriter.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 28 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class sparseClusterDCMLDA_test method printTopWord.

protected void printTopWord(int k, String topWordFile) {
    System.out.println("TopWord FilePath:" + topWordFile);
    Arrays.fill(m_sstat, 0);
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        PrintWriter topWordWriter = new PrintWriter(new File(topWordFile));
        for (int i = 0; i < topic_term_probabilty.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
            topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            topWordWriter.write("\n");
        }
        topWordWriter.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 29 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class sparseDCMLDA_test method printTopBeta.

protected void printTopBeta(int k, String topBetaFile) {
    System.out.println("TopWord FilePath:" + topBetaFile);
    Arrays.fill(m_sstat, 0);
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        PrintWriter topWordWriter = new PrintWriter(new File(topBetaFile));
        for (int i = 0; i < m_beta.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), m_beta[i][j]));
            topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            topWordWriter.write("\n");
        }
        topWordWriter.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 30 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class LDA_Gibbs_test method printTopWords.

public void printTopWords(int k, String betaFile) {
    double loglikelihood = calculate_log_likelihood();
    System.out.format("Final Log Likelihood %.3f\t", loglikelihood);
    String filePrefix = betaFile.replace("topWords.txt", "");
    debugOutput(filePrefix);
    Arrays.fill(m_sstat, 0);
    System.out.println("print top words");
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        System.out.println("beta file");
        PrintWriter betaOut = new PrintWriter(new File(betaFile));
        for (int i = 0; i < topic_term_probabilty.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
            betaOut.format("Topic %d(%.3f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) {
                betaOut.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
                System.out.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            }
            betaOut.println();
            System.out.println();
        }
        betaOut.flush();
        betaOut.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) PrintWriter(java.io.PrintWriter)

Aggregations

structures._RankItem (structures._RankItem)66 MyPriorityQueue (structures.MyPriorityQueue)39 File (java.io.File)27 PrintWriter (java.io.PrintWriter)27 structures._Doc (structures._Doc)25 FileNotFoundException (java.io.FileNotFoundException)20 structures._ParentDoc4DCM (structures._ParentDoc4DCM)3 structures._Review (structures._Review)3 structures._SparseFeature (structures._SparseFeature)3 structures._stat (structures._stat)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 structures._Doc4DCMLDA (structures._Doc4DCMLDA)2 SparseDoubleMatrix2D (cern.colt.matrix.tdouble.impl.SparseDoubleMatrix2D)1 structures._HDPThetaStar (structures._HDPThetaStar)1 structures._Node (structures._Node)1 structures._QUPair (structures._QUPair)1 structures._Query (structures._Query)1 structures._Stn (structures._Stn)1 structures._thetaStar (structures._thetaStar)1