use of structures._Review in project IR_Base by Linda-sunshine.
the class CLRWithDP method printInfo.
public void printInfo() {
MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(5);
// clear the statistics
for (int i = 0; i < m_kBar; i++) {
m_thetaStars[i].resetCount();
clusterRanker.add(new _RankItem(i, m_thetaStars[i].getMemSize()));
}
// collect statistics across users in adaptation data
_thetaStar theta = null;
for (int i = 0; i < m_userList.size(); i++) {
_DPAdaptStruct user = (_DPAdaptStruct) m_userList.get(i);
theta = user.getThetaStar();
for (_Review review : user.getReviews()) {
if (review.getType() != rType.ADAPTATION)
// only touch the adaptation data
continue;
else if (review.getYLabel() == 1)
theta.incPosCount();
else
theta.incNegCount();
}
}
System.out.print("[Info]Clusters:");
for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_thetaStars[i].showStat());
System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class CLRWithHDP method accumulateFeatureCount.
public void accumulateFeatureCount() {
int cIndex = 0;
// store the tf count of features in one cluster
m_tf_count = new int[m_kBar][m_featureSize];
for (_AdaptStruct user : m_userList) {
for (_Review r : user.getReviews()) {
if (r.getType() == rType.ADAPTATION) {
cIndex = r.getHDPThetaStar().getIndex();
// aggregate each cluster's word counts
for (_SparseFeature fv : r.getSparse()) {
m_tf_count[cIndex][fv.getIndex()] += fv.getValue();
}
}
}
}
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class CLRWithHDP method calculate_E_step.
// The main MCMC algorithm, assign each review to clusters.
@Override
protected void calculate_E_step() {
_HDPAdaptStruct user;
int sampleSize = 0;
for (int i = 0; i < m_userList.size(); i++) {
user = (_HDPAdaptStruct) m_userList.get(i);
if (user.getAdaptationSize() == 0)
continue;
for (_Review r : user.getReviews()) {
if (r.getType() == rType.TEST)
// do not touch testing reviews!
continue;
// Step 1: remove the current review from the thetaStar and user side.
updateDocMembership(user, r);
// Step 2: sample new cluster assignment for this review
sampleOneInstance(user, r);
if (++sampleSize % 2000 == 0) {
System.out.print('.');
// sampleGamma();//will this help sampling?
if (sampleSize % 100000 == 0)
System.out.println();
}
}
}
// sampleGamma();//will this help sampling?
System.out.println(m_kBar);
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class CLRWithHDP method initThetaStars.
// Randomly assign user reviews to k user groups.
@Override
public void initThetaStars() {
initPriorG0();
_HDPAdaptStruct user;
double L = 0, beta_sum = Utils.sumOfArray(m_betas), betaSum_lgamma = Utils.lgamma(beta_sum), sum = 0;
int index;
for (_AdaptStruct u : m_userList) {
user = (_HDPAdaptStruct) u;
for (_Review r : user.getReviews()) {
// for all reviews pre-compute the likelihood of being generated from a random language model
L = 0;
// sum = v*beta+\sum \pi_v(global language model)
sum = beta_sum;
// for those v with mij,v=0, frac = \gamma(beta_v)/\gamma(beta_v)=1, log frac = 0
for (_SparseFeature fv : r.getLMSparse()) {
index = fv.getIndex();
sum += fv.getValue();
// log \gamma(m_v+\pi_v+beta)/\gamma(\pi_v+beta)
// logGamma(\beta_i) is pre-computed for efficiency
L += Utils.lgamma(fv.getValue() + m_betas[index]) - Utils.lgamma(m_betas[index]);
}
L += betaSum_lgamma - Utils.lgamma(sum);
r.setL4NewCluster(L);
if (r.getType() == rType.TEST)
continue;
sampleOneInstance(user, r);
}
}
}
use of structures._Review in project IR_Base by Linda-sunshine.
the class CLRWithHDP method printInfo.
public void printInfo(boolean printDetails) {
MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(10);
// clear the statistics
for (int i = 0; i < m_kBar; i++) {
m_hdpThetaStars[i].resetCount();
// get the most popular clusters
clusterRanker.add(new _RankItem(i, m_hdpThetaStars[i].getMemSize()));
}
// collect statistics across users in adaptation data
_HDPThetaStar theta = null;
_HDPAdaptStruct user;
for (int i = 0; i < m_userList.size(); i++) {
user = (_HDPAdaptStruct) m_userList.get(i);
for (_Review r : user.getReviews()) {
if (r.getType() != rType.ADAPTATION)
// only touch the adaptation data
continue;
else {
theta = r.getHDPThetaStar();
if (r.getYLabel() == 1)
theta.incPosCount();
else
theta.incNegCount();
}
}
}
System.out.print("[Info]Clusters:");
for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_hdpThetaStars[i].showStat());
if (m_features == null)
System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
else if (printDetails) {
System.out.print(String.format("\n[Info]%d Clusters are found in total! And the highligt is as follows\n", m_kBar));
accumulateFeatureCount();
for (_RankItem it : clusterRanker) printTopWords(m_hdpThetaStars[it.m_index]);
}
}
Aggregations