Search in sources :

Example 46 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class CLRWithDP method printInfo.

public void printInfo() {
    MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(5);
    // clear the statistics
    for (int i = 0; i < m_kBar; i++) {
        m_thetaStars[i].resetCount();
        clusterRanker.add(new _RankItem(i, m_thetaStars[i].getMemSize()));
    }
    // collect statistics across users in adaptation data
    _thetaStar theta = null;
    for (int i = 0; i < m_userList.size(); i++) {
        _DPAdaptStruct user = (_DPAdaptStruct) m_userList.get(i);
        theta = user.getThetaStar();
        for (_Review review : user.getReviews()) {
            if (review.getType() != rType.ADAPTATION)
                // only touch the adaptation data
                continue;
            else if (review.getYLabel() == 1)
                theta.incPosCount();
            else
                theta.incNegCount();
        }
    }
    System.out.print("[Info]Clusters:");
    for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_thetaStars[i].showStat());
    System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
}
Also used : structures._thetaStar(structures._thetaStar) structures._RankItem(structures._RankItem) structures._Review(structures._Review) MyPriorityQueue(structures.MyPriorityQueue)

Example 47 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class CLRWithHDP method accumulateFeatureCount.

public void accumulateFeatureCount() {
    int cIndex = 0;
    // store the tf count of features in one cluster
    m_tf_count = new int[m_kBar][m_featureSize];
    for (_AdaptStruct user : m_userList) {
        for (_Review r : user.getReviews()) {
            if (r.getType() == rType.ADAPTATION) {
                cIndex = r.getHDPThetaStar().getIndex();
                // aggregate each cluster's word counts
                for (_SparseFeature fv : r.getSparse()) {
                    m_tf_count[cIndex][fv.getIndex()] += fv.getValue();
                }
            }
        }
    }
}
Also used : Classifier.supervised.modelAdaptation._AdaptStruct(Classifier.supervised.modelAdaptation._AdaptStruct) structures._Review(structures._Review) structures._SparseFeature(structures._SparseFeature)

Example 48 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class CLRWithHDP method calculate_E_step.

// The main MCMC algorithm, assign each review to clusters.
@Override
protected void calculate_E_step() {
    _HDPAdaptStruct user;
    int sampleSize = 0;
    for (int i = 0; i < m_userList.size(); i++) {
        user = (_HDPAdaptStruct) m_userList.get(i);
        if (user.getAdaptationSize() == 0)
            continue;
        for (_Review r : user.getReviews()) {
            if (r.getType() == rType.TEST)
                // do not touch testing reviews!
                continue;
            // Step 1: remove the current review from the thetaStar and user side.
            updateDocMembership(user, r);
            // Step 2: sample new cluster assignment for this review
            sampleOneInstance(user, r);
            if (++sampleSize % 2000 == 0) {
                System.out.print('.');
                // sampleGamma();//will this help sampling?
                if (sampleSize % 100000 == 0)
                    System.out.println();
            }
        }
    }
    // sampleGamma();//will this help sampling?
    System.out.println(m_kBar);
}
Also used : structures._Review(structures._Review)

Example 49 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class CLRWithHDP method initThetaStars.

// Randomly assign user reviews to k user groups.
@Override
public void initThetaStars() {
    initPriorG0();
    _HDPAdaptStruct user;
    double L = 0, beta_sum = Utils.sumOfArray(m_betas), betaSum_lgamma = Utils.lgamma(beta_sum), sum = 0;
    int index;
    for (_AdaptStruct u : m_userList) {
        user = (_HDPAdaptStruct) u;
        for (_Review r : user.getReviews()) {
            // for all reviews pre-compute the likelihood of being generated from a random language model
            L = 0;
            // sum = v*beta+\sum \pi_v(global language model)
            sum = beta_sum;
            // for those v with mij,v=0, frac = \gamma(beta_v)/\gamma(beta_v)=1, log frac = 0
            for (_SparseFeature fv : r.getLMSparse()) {
                index = fv.getIndex();
                sum += fv.getValue();
                // log \gamma(m_v+\pi_v+beta)/\gamma(\pi_v+beta)
                // logGamma(\beta_i) is pre-computed for efficiency
                L += Utils.lgamma(fv.getValue() + m_betas[index]) - Utils.lgamma(m_betas[index]);
            }
            L += betaSum_lgamma - Utils.lgamma(sum);
            r.setL4NewCluster(L);
            if (r.getType() == rType.TEST)
                continue;
            sampleOneInstance(user, r);
        }
    }
}
Also used : Classifier.supervised.modelAdaptation._AdaptStruct(Classifier.supervised.modelAdaptation._AdaptStruct) structures._Review(structures._Review) structures._SparseFeature(structures._SparseFeature)

Example 50 with structures._Review

use of structures._Review in project IR_Base by Linda-sunshine.

the class CLRWithHDP method printInfo.

public void printInfo(boolean printDetails) {
    MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(10);
    // clear the statistics
    for (int i = 0; i < m_kBar; i++) {
        m_hdpThetaStars[i].resetCount();
        // get the most popular clusters
        clusterRanker.add(new _RankItem(i, m_hdpThetaStars[i].getMemSize()));
    }
    // collect statistics across users in adaptation data
    _HDPThetaStar theta = null;
    _HDPAdaptStruct user;
    for (int i = 0; i < m_userList.size(); i++) {
        user = (_HDPAdaptStruct) m_userList.get(i);
        for (_Review r : user.getReviews()) {
            if (r.getType() != rType.ADAPTATION)
                // only touch the adaptation data
                continue;
            else {
                theta = r.getHDPThetaStar();
                if (r.getYLabel() == 1)
                    theta.incPosCount();
                else
                    theta.incNegCount();
            }
        }
    }
    System.out.print("[Info]Clusters:");
    for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_hdpThetaStars[i].showStat());
    if (m_features == null)
        System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
    else if (printDetails) {
        System.out.print(String.format("\n[Info]%d Clusters are found in total! And the highligt is as follows\n", m_kBar));
        accumulateFeatureCount();
        for (_RankItem it : clusterRanker) printTopWords(m_hdpThetaStars[it.m_index]);
    }
}
Also used : structures._RankItem(structures._RankItem) structures._Review(structures._Review) MyPriorityQueue(structures.MyPriorityQueue) structures._HDPThetaStar(structures._HDPThetaStar)

Aggregations

structures._Review (structures._Review)44 structures._SparseFeature (structures._SparseFeature)24 structures._HDPThetaStar (structures._HDPThetaStar)9 ArrayList (java.util.ArrayList)8 Feature (Classifier.supervised.liblinear.Feature)6 Classifier.supervised.modelAdaptation._AdaptStruct (Classifier.supervised.modelAdaptation._AdaptStruct)6 structures._PerformanceStat (structures._PerformanceStat)6 IOException (java.io.IOException)5 File (java.io.File)4 structures._User (structures._User)4 FeatureNode (Classifier.supervised.liblinear.FeatureNode)3 Parameter (Classifier.supervised.liblinear.Parameter)3 Problem (Classifier.supervised.liblinear.Problem)3 structures._RankItem (structures._RankItem)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 PrintWriter (java.io.PrintWriter)2 MyPriorityQueue (structures.MyPriorityQueue)2 SolverType (Classifier.supervised.liblinear.SolverType)1