use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.
the class CLRWithMMB method MLEB.
// MLE of B matrix
public double[][] MLEB() {
int e_0 = 0, e_1 = 0;
double b = 0;
double[][] B = new double[m_kBar][m_kBar];
_HDPThetaStar theta_g, theta_h;
for (int g = 0; g < m_kBar; g++) {
theta_g = m_hdpThetaStars[g];
for (int h = 0; h < m_kBar; h++) {
theta_h = m_hdpThetaStars[h];
e_0 = theta_g.getConnectionEdgeCount(theta_h, 0);
e_1 = theta_g.getConnectionEdgeCount(theta_h, 1);
b = (e_1 + m_abcd[0] - 1) / (e_0 + e_1 + m_abcd[0] + m_abcd[1] - 2);
B[g][h] = b;
B[h][g] = b;
}
}
return B;
}
use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.
the class CLRWithMMB method sampleEdge.
protected void sampleEdge(int i, int j, int e) {
int k = 0;
_HDPThetaStar theta_s, theta_h = m_indicator[j][i];
double likelihood, logNew, gamma_k, logSum = 0;
_MMBAdaptStruct ui = (_MMBAdaptStruct) m_userList.get(i);
_MMBAdaptStruct uj = (_MMBAdaptStruct) m_userList.get(j);
for (k = 0; k < m_kBar; k++) {
// log likelihood of the edge p(e_{ij}, z, B)
// p(eij|z_{i->j}, z_{j->i}, B)*p(z_{i->j}|\pi_i)*p(z_{j->i|\pj_j})
theta_s = m_hdpThetaStars[k];
// we record all the
if (!theta_h.isValid())
System.out.println("[Error]Invalid theta inside sampleEdge()!!");
likelihood = calcLogLikelihoodEMarginal(theta_s, theta_h, e);
if (Double.isInfinite(likelihood))
System.out.println("Infinite!");
// p(z=k|\gamma,\eta)
gamma_k = m_hdpThetaStars[k].getGamma();
likelihood += Math.log(calcGroupPopularity(ui, k, gamma_k));
// this is in log space!
m_hdpThetaStars[k].setProportion(likelihood);
if (k == 0)
logSum = likelihood;
else
logSum = Utils.logSum(logSum, likelihood);
}
// fix1: the probability for new cluster
logNew = Math.log(m_eta * m_gamma_e) + m_pNew[e];
logSum = Utils.logSum(logSum, logNew);
// Sample group k with likelihood.
k = sampleEdgeInLogSpace(logSum, e);
if (k == -1) {
// shall we consider the current edge?? posterior sampling??
sampleNewCluster4Edge();
k = m_kBar - 1;
}
// update the setting after sampling z_ij.
// first 1 means edge 1, the second one mean increase by 1.
m_hdpThetaStars[k].updateEdgeCount(e, 1);
m_MNL[e]++;
// update the user info with the newly sampled hdpThetaStar.
ui.addNeighbor(uj, m_hdpThetaStars[k], e);
// -->3
ui.incHDPThetaStarEdgeSize(m_hdpThetaStars[k], 1);
// Put the reference to the matrix for later usage.
// Since we have all the info, we don't need to put the theta info in the _MMBNeighbor structure.
m_indicator[i][j] = m_hdpThetaStars[k];
}
use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.
the class CLRWithMMB method sampleZeroEdgeJoint.
// sample eij = 0 from the joint probabilities of cij, zij and zji.
public void sampleZeroEdgeJoint(int i, int j) {
/**
*we will consider all possible combinations of different memberships.
* 1.cij=0, cji=0, prob: (1-\rho), 1 case
* 2.cij=1, cji=1, known (Bgh, Bhg), prob: \rho(1-Bgh), k(k+1)/2 possible cases
* posterior prob: \rho*(b+e_0)/(a+b+e_0+e_1)
* 3.cij=1, cji=1, unknows (Bgh, Bhg), prob: \rho*b/(a+b), k+1 possible cases
* In total, we have (k+1)*(k+2)/2+1 possible cases. *
*/
// Step 1: calc prob for different cases of cij, cji.
// case 0: background model while the prob is not stored in the two-dim array.
double logSum = Math.log(1 - m_rho);
/**
*We maintain a matrix for storing probability. As the matrix is
* symmetric, we only calculate upper-triangle. *
*/
double[][] cacheB = new double[m_kBar + 1][m_kBar + 1];
for (double[] b : cacheB) Arrays.fill(b, Double.NEGATIVE_INFINITY);
_MMBAdaptStruct ui = (_MMBAdaptStruct) m_userList.get(i);
_MMBAdaptStruct uj = (_MMBAdaptStruct) m_userList.get(j);
_HDPThetaStar theta_g, theta_h;
// case 1: existing thetas.
for (int g = 0; g < m_kBar; g++) {
theta_g = m_hdpThetaStars[g];
for (int h = g; h < m_kBar; h++) {
theta_h = m_hdpThetaStars[h];
cacheB[g][h] = calcLogLikelihoodE(theta_g, theta_h, 0);
cacheB[g][h] += Math.log(theta_g.getGamma()) + Math.log(theta_h.getGamma());
if (g == h) {
logSum = Utils.logSum(logSum, cacheB[g][h]);
} else {
cacheB[h][g] = cacheB[g][h];
// we need to add twice of logp.
logSum = Utils.logSum(logSum, cacheB[h][g] + m_log2);
}
}
}
// case 2: either one is from new cluster.
// pre-calculate \rho*(b/(a+b))*\gamma_e
double pNew = Math.log(m_rho) + Math.log(m_abcd[1]) - Math.log(m_abcd[0] + m_abcd[1]);
double gamma_g = 0;
for (int k = 0; k < m_kBar; k++) {
gamma_g = m_hdpThetaStars[k].getGamma();
// if either one is 0, then prob is 0 -> log prob = -Infinity
if (m_gamma_e != 0 && gamma_g != 0) {
cacheB[k][m_kBar] = pNew + Math.log(m_gamma_e) + Math.log(gamma_g);
cacheB[m_kBar][k] = cacheB[k][m_kBar];
logSum = Utils.logSum(logSum, cacheB[k][m_kBar] + m_log2);
}
}
// both are from new clusters.
if (m_gamma_e != 0) {
cacheB[m_kBar][m_kBar] = pNew + Math.log(m_gamma_e) + m_log2;
logSum = Utils.logSum(logSum, cacheB[m_kBar][m_kBar]);
}
// Step 2: sample one pair from the prob matrix.
int k = sampleIn2DimArrayLogSpace(logSum, Math.log(1 - m_rho), cacheB);
// Step 3: Analyze the sampled cluster results.
// case 1: k == -1, sample from the background model;
// case 2: k!= 1, sample from mmb model.
int g = 0, h = 0;
if (k != -1) {
g = k / (m_kBar + 1);
h = k % (m_kBar + 1);
if (g == m_kBar || h == m_kBar) {
// we need to sample the new cluster
// shall we consider the current edge?? posterior sampling??
sampleNewCluster4Edge();
}
// Update the thetaStar and user info after getting z_ij.
// -->1
m_hdpThetaStars[g].updateEdgeCount(0, 1);
ui.addNeighbor(uj, m_hdpThetaStars[g], 0);
ui.incHDPThetaStarEdgeSize(m_hdpThetaStars[g], 1);
m_indicator[i][j] = m_hdpThetaStars[g];
updateSampleSize(0, 1);
// Update the thetaStar and user info after getting z_ji.
m_hdpThetaStars[h].updateEdgeCount(0, 1);
uj.addNeighbor(ui, m_hdpThetaStars[h], 0);
uj.incHDPThetaStarEdgeSize(m_hdpThetaStars[h], 1);
m_indicator[j][i] = m_hdpThetaStars[h];
updateSampleSize(0, 1);
addConnection(ui, uj, 0);
} else {
updateSampleSize(2, 2);
}
}
use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.
the class CLRWithMMB method updateDocMembership.
@Override
public // Override this function since we have different conditions for removing clusters.
void updateDocMembership(_HDPAdaptStruct user, _Review r) {
int index = -1;
_HDPThetaStar curThetaStar = r.getHDPThetaStar();
// remove the current review from the user side.
user.incHDPThetaStarMemSize(r.getHDPThetaStar(), -1);
// remove the current review from the theta side.
// remove the lm stat first before decrease the document count
curThetaStar.rmLMStat(r.getLMSparse());
curThetaStar.updateMemCount(-1);
// No data associated with the cluster
if (curThetaStar.getMemSize() == 0 && curThetaStar.getTotalEdgeSize() == 0) {
System.out.println("[Debug]Zero cluster detected in updating doc!");
// check if every dim gets 0 count in language mode
LMStatSanityCheck(curThetaStar);
// recycle the gamma
m_gamma_e += curThetaStar.getGamma();
// curThetaStar.resetGamma();
// swap the disabled theta to the last for later use
index = findHDPThetaStar(curThetaStar);
// move it back to \theta*
swapTheta(m_kBar - 1, index);
curThetaStar.reset();
m_kBar--;
}
}
use of structures._HDPThetaStar in project IR_Base by Linda-sunshine.
the class CLRWithHDP method swapTheta.
@Override
protected void swapTheta(int a, int b) {
if (a == b)
// If they are the same, no need to swap.
return;
_HDPThetaStar cTheta = m_hdpThetaStars[a];
m_hdpThetaStars[a] = m_hdpThetaStars[b];
// kBar starts from 0, the size decides how many are valid.
m_hdpThetaStars[b] = cTheta;
}
Aggregations