Search in sources :

Example 11 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class HTMM method docSummary.

public void docSummary(String[] productList) {
    for (String prodID : productList) {
        for (int i = 0; i < this.number_of_topics; i++) {
            // top three sentences per topic per product
            MyPriorityQueue<_RankItem> stnQueue = new MyPriorityQueue<_RankItem>(3);
            for (_Doc d : m_trainSet) {
                if (d.getItemID().equalsIgnoreCase(prodID)) {
                    for (int j = 0; j < d.getSenetenceSize(); j++) {
                        _Stn sentence = d.getSentence(j);
                        double prob = d.m_topics[i];
                        for (_SparseFeature f : sentence.getFv()) prob += f.getValue() * topic_term_probabilty[i][f.getIndex()];
                        prob /= sentence.getLength();
                        stnQueue.add(new _RankItem(sentence.getRawSentence(), prob));
                    }
                }
            }
            System.out.format("Product: %s, Topic: %d\n", prodID, i);
            summaryWriter.format("Product: %s, Topic: %d\n", prodID, i);
            for (_RankItem it : stnQueue) {
                System.out.format("%s\t%.3f\n", it.m_name, it.m_value);
                summaryWriter.format("%s\t%.3f\n", it.m_name, it.m_value);
            }
        }
    }
    summaryWriter.flush();
    summaryWriter.close();
}
Also used : structures._Stn(structures._Stn) structures._RankItem(structures._RankItem) MyPriorityQueue(structures.MyPriorityQueue) structures._Doc(structures._Doc) structures._SparseFeature(structures._SparseFeature)

Example 12 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class pLSA method printTopWords.

// print all the quantities in real space
@Override
public void printTopWords(int k) {
    Arrays.fill(m_sstat, 0);
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    for (int i = 0; i < topic_term_probabilty.length; i++) {
        MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
        for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
        System.out.format("Topic %d(%.5f):\t", i, m_sstat[i]);
        for (_RankItem it : fVector) System.out.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
        System.out.println();
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue)

Example 13 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class pLSA method printTopWords.

// print all the quantities in real space
@Override
public void printTopWords(int k, String topWordPath) {
    System.out.println("TopWord FilePath:" + topWordPath);
    Arrays.fill(m_sstat, 0);
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        PrintWriter topWordWriter = new PrintWriter(new File(topWordPath));
        for (int i = 0; i < topic_term_probabilty.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
            topWordWriter.format("Topic %d(%.5f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) topWordWriter.format("%s(%.5f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            topWordWriter.write("\n");
        }
        topWordWriter.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 14 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class twoTopic method printTopWords.

@Override
public void printTopWords(int k) {
    // we only have one topic to show
    MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
    for (int i = 0; i < m_theta.length; i++) fVector.add(new _RankItem(m_corpus.getFeature(i), m_theta[i]));
    for (_RankItem it : fVector) System.out.format("%s(%.3f)\t", it.m_name, it.m_value);
    System.out.println();
}
Also used : structures._RankItem(structures._RankItem) MyPriorityQueue(structures.MyPriorityQueue)

Example 15 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class CoLinAdapt method calculateR2.

public double calculateR2(_AdaptStruct u) {
    // R2 regularization
    _CoLinAdaptStruct ui = (_CoLinAdaptStruct) u, uj;
    double R2 = 0, diffA, diffB;
    for (_RankItem nit : ui.getNeighbors()) {
        uj = (_CoLinAdaptStruct) m_userList.get(nit.m_index);
        diffA = 0;
        diffB = 0;
        for (int k = 0; k < m_dim; k++) {
            diffA += (ui.getScaling(k) - uj.getScaling(k)) * (ui.getScaling(k) - uj.getScaling(k));
            diffB += (ui.getShifting(k) - uj.getShifting(k)) * (ui.getShifting(k) - uj.getShifting(k));
        }
        R2 += nit.m_value * (m_eta3 * diffA + m_eta4 * diffB);
    // R2 += 0.1 * (m_eta3*diffA + m_eta4*diffB);
    // R2 += (nit.m_value / simSum) * (m_eta3*diffA + m_eta4*diffB);
    }
    return R2;
}
Also used : structures._RankItem(structures._RankItem)

Aggregations

structures._RankItem (structures._RankItem)66 MyPriorityQueue (structures.MyPriorityQueue)39 File (java.io.File)27 PrintWriter (java.io.PrintWriter)27 structures._Doc (structures._Doc)25 FileNotFoundException (java.io.FileNotFoundException)20 structures._ParentDoc4DCM (structures._ParentDoc4DCM)3 structures._Review (structures._Review)3 structures._SparseFeature (structures._SparseFeature)3 structures._stat (structures._stat)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 structures._Doc4DCMLDA (structures._Doc4DCMLDA)2 SparseDoubleMatrix2D (cern.colt.matrix.tdouble.impl.SparseDoubleMatrix2D)1 structures._HDPThetaStar (structures._HDPThetaStar)1 structures._Node (structures._Node)1 structures._QUPair (structures._QUPair)1 structures._Query (structures._Query)1 structures._Stn (structures._Stn)1 structures._thetaStar (structures._thetaStar)1