use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class pLSA method calculate_log_likelihood.
/*likelihod calculation */
/* M is number of doc
* N is number of word in corpus
*/
/* p(w,d) = sum_1_M sum_1_N count(d_i, w_j) * log[ lambda*p(w|theta_B) + [lambda * sum_1_k (p(w|z) * p(z|d)) */
// NOTE: cannot be used for unseen documents!
@Override
protected double calculate_log_likelihood(_Doc d) {
double logLikelihood = docThetaLikelihood(d), prob;
for (_SparseFeature fv : d.getSparse()) {
int j = fv.getIndex();
prob = 0.0;
for (// \sum_z p(w|z,\theta)p(z|d)
int k = 0; // \sum_z p(w|z,\theta)p(z|d)
k < this.number_of_topics; // \sum_z p(w|z,\theta)p(z|d)
k++) prob += d.m_topics[k] * topic_term_probabilty[k][j];
// (1-\lambda)p(w|d) * \lambda p(w|theta_b)
prob = prob * (1 - m_lambda) + this.background_probability[j] * m_lambda;
logLikelihood += fv.getValue() * Math.log(prob);
}
return logLikelihood;
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class twoTopic method calculate_log_likelihood.
protected double calculate_log_likelihood(_Doc d) {
double logLikelihood = 0.0;
for (_SparseFeature fv : d.getSparse()) {
int wid = fv.getIndex();
logLikelihood += fv.getValue() * Math.log(m_lambda * background_probability[wid] + (1 - m_lambda) * m_theta[wid]);
}
return logLikelihood;
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class twoTopic method calculate_E_step.
@Override
public double calculate_E_step(_Doc d) {
for (_SparseFeature fv : d.getSparse()) {
int wid = fv.getIndex();
m_sstat[wid] = (1 - m_lambda) * m_theta[wid];
// compute the expectation
m_sstat[wid] = fv.getValue() * m_sstat[wid] / (m_sstat[wid] + m_lambda * background_probability[wid]);
}
return calculate_log_likelihood(d);
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class Utils method dotProduct.
// Calculate the similarity between two sparse vectors.
public static double dotProduct(_SparseFeature[] spVct1, _SparseFeature[] spVct2) {
if (spVct1 == null || spVct2 == null)
// What is the minimal value of similarity?
return 0;
double similarity = 0;
int p1 = 0, p2 = 0;
while (p1 < spVct1.length && p2 < spVct2.length) {
_SparseFeature t1 = spVct1[p1];
_SparseFeature t2 = spVct2[p2];
if (t1.getIndex() == t2.getIndex()) {
similarity += t1.getValue() * t2.getValue();
p1++;
p2++;
} else if (t1.getIndex() > t2.getIndex())
p2++;
else
p1++;
}
return similarity;
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class Utils method createLibLinearFV.
public static Feature[] createLibLinearFV(HashMap<Integer, Double> spVct) {
Feature[] node = new Feature[spVct.size()];
int fid = 0;
for (_SparseFeature fv : createSpVct(spVct)) // svm's feature index starts from 1
node[fid++] = new FeatureNode(1 + fv.getIndex(), fv.getValue());
return node;
}
Aggregations