use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class _DPAdaptStruct method evaluate.
public double evaluate(_Doc doc) {
double prob = 0, sum;
if (m_dim == 0) {
// not adaptation based
for (int k = 0; k < m_cluPosterior.length; k++) {
// need to be fixed: here we assumed binary classification
sum = Utils.dotProduct(CLRWithDP.m_thetaStars[k].getModel(), doc.getSparse(), 0);
if (MTCLRWithDP.m_supWeights != null && MTCLRWithDP.m_q != 0)
sum += MTCLRWithDP.m_q * Utils.dotProduct(MTCLRWithDP.m_supWeights, doc.getSparse(), 0);
prob += m_cluPosterior[k] * Utils.logistic(sum);
}
} else {
int n, m;
double[] As;
for (int k = 0; k < m_cluPosterior.length; k++) {
As = CLRWithDP.m_thetaStars[k].getModel();
// Bias term: w_s0*a0+b0.
sum = As[0] * CLinAdaptWithDP.m_supWeights[0] + As[m_dim];
for (_SparseFeature fv : doc.getSparse()) {
n = fv.getIndex() + 1;
m = m_featureGroupMap[n];
sum += (As[m] * CLinAdaptWithDP.m_supWeights[n] + As[m_dim + m]) * fv.getValue();
}
prob += m_cluPosterior[k] * Utils.logistic(sum);
}
}
// accumulate the prediction results during sampling procedure
doc.m_pCount++;
// >0.5?1:0;
doc.m_prob += prob;
return prob;
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class CLRWithHDP method accumulateFeatureCount.
public void accumulateFeatureCount() {
int cIndex = 0;
// store the tf count of features in one cluster
m_tf_count = new int[m_kBar][m_featureSize];
for (_AdaptStruct user : m_userList) {
for (_Review r : user.getReviews()) {
if (r.getType() == rType.ADAPTATION) {
cIndex = r.getHDPThetaStar().getIndex();
// aggregate each cluster's word counts
for (_SparseFeature fv : r.getSparse()) {
m_tf_count[cIndex][fv.getIndex()] += fv.getValue();
}
}
}
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class CLRWithHDP method initThetaStars.
// Randomly assign user reviews to k user groups.
@Override
public void initThetaStars() {
initPriorG0();
_HDPAdaptStruct user;
double L = 0, beta_sum = Utils.sumOfArray(m_betas), betaSum_lgamma = Utils.lgamma(beta_sum), sum = 0;
int index;
for (_AdaptStruct u : m_userList) {
user = (_HDPAdaptStruct) u;
for (_Review r : user.getReviews()) {
// for all reviews pre-compute the likelihood of being generated from a random language model
L = 0;
// sum = v*beta+\sum \pi_v(global language model)
sum = beta_sum;
// for those v with mij,v=0, frac = \gamma(beta_v)/\gamma(beta_v)=1, log frac = 0
for (_SparseFeature fv : r.getLMSparse()) {
index = fv.getIndex();
sum += fv.getValue();
// log \gamma(m_v+\pi_v+beta)/\gamma(\pi_v+beta)
// logGamma(\beta_i) is pre-computed for efficiency
L += Utils.lgamma(fv.getValue() + m_betas[index]) - Utils.lgamma(m_betas[index]);
}
L += betaSum_lgamma - Utils.lgamma(sum);
r.setL4NewCluster(L);
if (r.getType() == rType.TEST)
continue;
sampleOneInstance(user, r);
}
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class CLinAdaptWithHDP method gradientByFunc.
@Override
protected void gradientByFunc(_AdaptStruct u, _Doc review, double weight, double[] g) {
_Review r = (_Review) review;
// feature index
int n, k;
int cIndex = r.getHDPThetaStar().getIndex();
if (cIndex < 0 || cIndex >= m_kBar)
System.err.println("Error,cannot find the theta star!");
int offset = m_dim * 2 * cIndex;
double delta = (review.getYLabel() - logit(review.getSparse(), r)) * weight;
// Bias term for individual user.
// a[0] = ws0*x0; x0=1
g[offset] -= delta * m_gWeights[0];
// b[0]
g[offset + m_dim] -= delta;
// Traverse all the feature dimension to calculate the gradient for both individual users and super user.
for (_SparseFeature fv : review.getSparse()) {
n = fv.getIndex() + 1;
k = m_featureGroupMap[n];
// w_si*x_di
g[offset + k] -= delta * m_gWeights[n] * fv.getValue();
// x_di
g[offset + m_dim + k] -= delta * fv.getValue();
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class MTCLRWithHDP method gradientByFunc.
@Override
protected void gradientByFunc(_AdaptStruct u, _Doc review, double weight, double[] g) {
_Review r = (_Review) review;
// feature index
int n;
int cIndex = r.getHDPThetaStar().getIndex();
if (cIndex < 0 || cIndex >= m_kBar)
System.err.println("Error,cannot find the theta star!");
int offset = m_dim * cIndex;
int offsetSup = m_dim * m_kBar;
double delta = weight * (r.getYLabel() - logit(r.getSparse(), r));
// Bias term.
// x0=1, each cluster.
g[offset] -= delta;
// super model.
g[offsetSup] -= m_q * delta;
// Traverse all the feature dimension to calculate the gradient.
for (_SparseFeature fv : review.getSparse()) {
n = fv.getIndex() + 1;
// cluster model.
g[offset + n] -= delta * fv.getValue();
// super model.
g[offsetSup + n] -= delta * fv.getValue() * m_q;
}
}
Aggregations