use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.
the class CLRWithHDP method printInfo.
public void printInfo(boolean printDetails) {
MyPriorityQueue<_RankItem> clusterRanker = new MyPriorityQueue<_RankItem>(10);
// clear the statistics
for (int i = 0; i < m_kBar; i++) {
m_hdpThetaStars[i].resetCount();
// get the most popular clusters
clusterRanker.add(new _RankItem(i, m_hdpThetaStars[i].getMemSize()));
}
// collect statistics across users in adaptation data
_HDPThetaStar theta = null;
_HDPAdaptStruct user;
for (int i = 0; i < m_userList.size(); i++) {
user = (_HDPAdaptStruct) m_userList.get(i);
for (_Review r : user.getReviews()) {
if (r.getType() != rType.ADAPTATION)
// only touch the adaptation data
continue;
else {
theta = r.getHDPThetaStar();
if (r.getYLabel() == 1)
theta.incPosCount();
else
theta.incNegCount();
}
}
}
System.out.print("[Info]Clusters:");
for (int i = 0; i < m_kBar; i++) System.out.format("%s\t", m_hdpThetaStars[i].showStat());
if (m_features == null)
System.out.print(String.format("\n[Info]%d Clusters are found in total!\n", m_kBar));
else if (printDetails) {
System.out.print(String.format("\n[Info]%d Clusters are found in total! And the highligt is as follows\n", m_kBar));
accumulateFeatureCount();
for (_RankItem it : clusterRanker) printTopWords(m_hdpThetaStars[it.m_index]);
}
}
use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.
the class CLRWithHDP method sampleGamma.
// Sample the global mixture proportion, \gamma~Dir(m1, m2,..,\alpha)
protected void sampleGamma() {
for (int k = 0; k < m_kBar; k++) m_hdpThetaStars[k].m_hSize = 0;
_HDPAdaptStruct user;
for (int i = 0; i < m_userList.size(); i++) {
user = (_HDPAdaptStruct) m_userList.get(i);
if (user.getAdaptationSize() == 0)
continue;
for (_HDPThetaStar s : user.getHDPTheta4Rvw()) s.m_hSize += sampleH(user, s);
}
// for gamma_e
m_cache[m_kBar] = Gamma.staticNextDouble(m_alpha, 1);
double sum = m_cache[m_kBar];
for (int k = 0; k < m_kBar; k++) {
m_cache[k] = Gamma.staticNextDouble(m_hdpThetaStars[k].m_hSize + m_alpha, 1);
sum += m_cache[k];
}
for (int k = 0; k < m_kBar; k++) m_hdpThetaStars[k].setGamma(m_cache[k] / sum);
// \gamma_e.
m_gamma_e = m_cache[m_kBar] / sum;
}
use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.
the class CLRWithHDP method updateDocMembership.
public void updateDocMembership(_HDPAdaptStruct user, _Review r) {
int index = -1;
_HDPThetaStar curThetaStar = r.getHDPThetaStar();
// remove the current review from the user side.
user.incHDPThetaStarMemSize(r.getHDPThetaStar(), -1);
// remove the current review from the theta side.
// remove the lm stat first before decrease the document count
curThetaStar.rmLMStat(r.getLMSparse());
curThetaStar.updateMemCount(-1);
// No data associated with the cluster
if (curThetaStar.getMemSize() == 0) {
// check if every dim gets 0 count in language model
LMStatSanityCheck(curThetaStar);
// recycle the gamma
m_gamma_e += curThetaStar.getGamma();
curThetaStar.resetGamma();
// swap the disabled theta to the last for later use
index = findHDPThetaStar(curThetaStar);
// move it back to \theta*
swapTheta(m_kBar - 1, index);
// in case we forget to init some variable, we set it to null
curThetaStar = null;
// curThetaStar.disable();
m_kBar--;
}
}
use of structures._HDPThetaStar._Connection in project IR_Base by Linda-sunshine.
the class MTCLinAdaptWithHDPMultipleE method calcLogLikelihoodY.
// In function logLikelihood, we update the loglikelihood and corresponding gradients.
// Thus, we only need to update the two functions correspondingly with.
protected double calcLogLikelihoodY(_Review r) {
int index = -1;
_HDPThetaStar oldTheta = r.getHDPThetaStar();
HashMap<_HDPThetaStar, Integer> thetaCountMap = r.getThetaCountMap();
double likelihood = 0;
for (_HDPThetaStar theta : thetaCountMap.keySet()) {
index = findHDPThetaStar(theta);
// some of the cluster may disappear, ignore them.
if (index >= m_kBar || index < 0)
continue;
r.setHDPThetaStar(theta);
// log(likelihood^k) = k * log likelihood.
likelihood += thetaCountMap.get(theta) * super.calcLogLikelihoodY(r);
}
r.setHDPThetaStar(oldTheta);
return likelihood;
}
Aggregations