Search in sources :

Example 51 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class PairwiseSimCalculator method constructNearestGraph.

void constructNearestGraph() {
    _Doc di, dj;
    double similarity;
    MyPriorityQueue<_RankItem> kUL = new MyPriorityQueue<_RankItem>(m_GFObj.m_k);
    MyPriorityQueue<_RankItem> kUU = new MyPriorityQueue<_RankItem>(m_GFObj.m_kPrime);
    _Node node;
    for (int i = m_start; i < m_end; i++) {
        di = m_GFObj.getTestDoc(i);
        node = m_GFObj.m_nodeList[i];
        // find the nearest unlabeled examples among all candidates since the similarity might not be symmetric
        for (int j = 0; j < m_GFObj.m_U; j++) {
            if (i == j)
                continue;
            dj = m_GFObj.getTestDoc(j);
            similarity = m_GFObj.getSimilarity(di, dj);
            kUU.add(new _RankItem(j, similarity));
        }
        for (_RankItem it : kUU) node.addUnlabeledEdge(m_GFObj.m_nodeList[it.m_index], it.m_value);
        kUU.clear();
        // find the nearest labeled examples
        for (int j = 0; j < m_GFObj.m_L; j++) {
            dj = m_GFObj.getLabeledDoc(j);
            similarity = m_GFObj.getSimilarity(di, dj);
            kUL.add(new _RankItem(m_GFObj.m_U + j, similarity));
        }
        for (_RankItem it : kUL) node.addLabeledEdge(m_GFObj.m_nodeList[it.m_index], it.m_value);
        kUL.clear();
        // sort the edges to accelerate debug output
        node.sortEdges();
    }
    System.out.format("[%d,%d) finished...\n", m_start, m_end);
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) structures._Node(structures._Node)

Example 52 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class KNN method predict.

@Override
public int predict(_Doc doc) {
    Collection<_Doc> docs;
    if (m_l <= 0) {
        // no random projection
        docs = m_trainSet;
    } else {
        docs = m_buckets.get(getHashCode(doc));
        if (docs.size() < m_k) {
            System.err.println("L is set too large, tune the parameter.");
            return -1;
        }
    }
    MyPriorityQueue<_RankItem> neighbors = new MyPriorityQueue<_RankItem>(m_k);
    for (_Doc d : docs) neighbors.add(new _RankItem(d.getYLabel(), Utils.dotProduct(d, doc)));
    Arrays.fill(m_cProbs, 0);
    for (_RankItem rt : neighbors) // why don't we consider the similarity?
    m_cProbs[rt.m_index]++;
    return Utils.argmax(m_cProbs);
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue)

Example 53 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class KNN method score.

@Override
public double score(_Doc doc, int label) {
    Collection<_Doc> docs;
    if (m_l <= 0) {
        // no random projection
        docs = m_trainSet;
    } else {
        docs = m_buckets.get(getHashCode(doc));
        if (docs.size() < m_k) {
            System.err.println("L is set too large, tune the parameter.");
            return -1;
        }
    }
    MyPriorityQueue<_RankItem> neighbors = new MyPriorityQueue<_RankItem>(m_k);
    for (_Doc d : docs) neighbors.add(new _RankItem(d.getYLabel(), Utils.dotProduct(d, doc)));
    Arrays.fill(m_cProbs, 0);
    for (_RankItem rt : neighbors) m_cProbs[rt.m_index]++;
    // to be consistent with the predict function
    return m_cProbs[label] - m_k;
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue)

Example 54 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC method printTopWords.

public void printTopWords(int k, String betaFile) {
    double loglikelihood = calculate_log_likelihood();
    System.out.format("Final Log Likelihood %.3f\t", loglikelihood);
    String filePrefix = betaFile.replace("topWords.txt", "");
    Arrays.fill(m_sstat, 0);
    System.out.println("print top words");
    for (_Doc d : m_trainSet) {
        for (int i = 0; i < number_of_topics; i++) m_sstat[i] += m_logSpace ? Math.exp(d.m_topics[i]) : d.m_topics[i];
    }
    Utils.L1Normalization(m_sstat);
    try {
        System.out.println("beta file");
        PrintWriter betaOut = new PrintWriter(new File(betaFile));
        for (int i = 0; i < topic_term_probabilty.length; i++) {
            MyPriorityQueue<_RankItem> fVector = new MyPriorityQueue<_RankItem>(k);
            for (int j = 0; j < vocabulary_size; j++) fVector.add(new _RankItem(m_corpus.getFeature(j), topic_term_probabilty[i][j]));
            betaOut.format("Topic %d(%.3f):\t", i, m_sstat[i]);
            for (_RankItem it : fVector) {
                betaOut.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
                System.out.format("%s(%.3f)\t", it.m_name, m_logSpace ? Math.exp(it.m_value) : it.m_value);
            }
            betaOut.println();
            System.out.println();
        }
        betaOut.flush();
        betaOut.close();
    } catch (Exception ex) {
        System.err.print("File Not Found");
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Doc(structures._Doc) MyPriorityQueue(structures.MyPriorityQueue) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 55 with structures._RankItem

use of structures._RankItem in project IR_Base by Linda-sunshine.

the class CoLinAdapt method gradientByR2.

// Calculate the gradients for the use in LBFGS.
protected void gradientByR2(_AdaptStruct user) {
    _CoLinAdaptStruct ui = (_CoLinAdaptStruct) user, uj;
    int offseti = m_dim * 2 * ui.getId(), offsetj;
    double coef, dA, dB;
    for (_RankItem nit : ui.getNeighbors()) {
        uj = (_CoLinAdaptStruct) m_userList.get(nit.m_index);
        offsetj = m_dim * 2 * uj.getId();
        coef = 2 * nit.m_value;
        for (int k = 0; k < m_dim; k++) {
            dA = coef * m_eta3 * (ui.getScaling(k) - uj.getScaling(k));
            dB = coef * m_eta4 * (ui.getShifting(k) - uj.getShifting(k));
            // update ui's gradient
            m_g[offseti + k] += dA;
            m_g[offseti + k + m_dim] += dB;
            // update uj's gradient
            m_g[offsetj + k] -= dA;
            m_g[offsetj + k + m_dim] -= dB;
        }
    }
}
Also used : structures._RankItem(structures._RankItem)

Aggregations

structures._RankItem (structures._RankItem)66 MyPriorityQueue (structures.MyPriorityQueue)39 File (java.io.File)27 PrintWriter (java.io.PrintWriter)27 structures._Doc (structures._Doc)25 FileNotFoundException (java.io.FileNotFoundException)20 structures._ParentDoc4DCM (structures._ParentDoc4DCM)3 structures._Review (structures._Review)3 structures._SparseFeature (structures._SparseFeature)3 structures._stat (structures._stat)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 structures._Doc4DCMLDA (structures._Doc4DCMLDA)2 SparseDoubleMatrix2D (cern.colt.matrix.tdouble.impl.SparseDoubleMatrix2D)1 structures._HDPThetaStar (structures._HDPThetaStar)1 structures._Node (structures._Node)1 structures._QUPair (structures._QUPair)1 structures._Query (structures._Query)1 structures._Stn (structures._Stn)1 structures._thetaStar (structures._thetaStar)1