use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method generateReferenceModel.
public void generateReferenceModel() {
m_allWordFrequency = 0;
for (_Doc d : m_corpus.getCollection()) {
_SparseFeature[] fv = d.getSparse();
for (int i = 0; i < fv.length; i++) {
int wid = fv[i].getIndex();
double val = fv[i].getValue();
m_allWordFrequency += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
}
for (int wid : m_wordSstat.keySet()) {
double val = m_wordSstat.get(wid);
double prob = val / m_allWordFrequency;
m_wordSstat.put(wid, prob);
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class HTMM method ComputeEmissionProbsForDoc.
// Construct the emission probabilities for sentences under different topics in a particular document.
void ComputeEmissionProbsForDoc(_Doc d) {
for (int i = 0; i < d.getSenetenceSize(); i++) {
_Stn stn = d.getSentence(i);
Arrays.fill(emission[i], 0);
for (int k = 0; k < this.number_of_topics; k++) {
for (_SparseFeature w : stn.getFv()) {
// all in log-space
emission[i][k] += w.getValue() * topic_term_probabilty[k][w.getIndex()];
}
}
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class HTSM method ComputeEmissionProbsForDoc.
@Override
// Construct the emission probabilities for sentences under different topics in a particular document.
void ComputeEmissionProbsForDoc(_Doc d) {
for (int i = 0; i < d.getSenetenceSize(); i++) {
_Stn stn = d.getSentence(i);
Arrays.fill(emission[i], 0);
int start = 0, end = this.number_of_topics;
if (i == 0 && d.getSourceType() == 2) {
// first sentence is specially handled for newEgg
// get the sentiment label of the first sentence
int sentimentLabel = stn.getStnSentiLabel();
if (sentimentLabel == 0) {
// positive sentiment in the first half
end = this.number_of_topics / 2;
for (int k = end; k < this.number_of_topics; k++) emission[i][k] = Double.NEGATIVE_INFINITY;
} else if (sentimentLabel == 1) {
// negative sentiment in the second half
start = this.number_of_topics / 2;
for (int k = 0; k < start; k++) emission[i][k] = Double.NEGATIVE_INFINITY;
}
}
for (int k = start; k < end; k++) {
for (_SparseFeature w : stn.getFv()) {
// all in log-space
emission[i][k] += w.getValue() * topic_term_probabilty[k][w.getIndex()];
}
}
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class pLSA method calculate_E_step.
@Override
public double calculate_E_step(_Doc d) {
// background proportion
double propB;
// expectation of each term under topic assignment
double exp;
for (_SparseFeature fv : d.getSparse()) {
// jth word in doc
int j = fv.getIndex();
double v = fv.getValue();
// -----------------compute posterior-----------
double sum = 0;
for (int k = 0; k < this.number_of_topics; k++) // shall we compute it in log space?
sum += d.m_topics[k] * topic_term_probabilty[k][j];
propB = m_lambda * background_probability[j];
// posterior of background probability
propB /= propB + (1 - m_lambda) * sum;
// -----------------compute and accumulate expectations-----------
for (int k = 0; k < this.number_of_topics; k++) {
exp = v * (1 - propB) * d.m_topics[k] * topic_term_probabilty[k][j] / sum;
d.m_sstat[k] += exp;
if (// when testing, we don't need to collect sufficient statistics
m_collectCorpusStats)
word_topic_sstat[k][j] += exp;
}
}
if (m_collectCorpusStats == false || m_converge > 0)
return calculate_log_likelihood(d);
else
// no need to compute likelihood
return 1;
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class pLSA method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
// initialize topic document proportion, p(z|d)
// initialize background topic
Arrays.fill(background_probability, d_beta - 1.0);
for (_Doc d : collection) {
// allocate memory and randomize it
d.setTopics(number_of_topics, d_alpha - 1.0);
for (_SparseFeature fv : d.getSparse()) background_probability[fv.getIndex()] += fv.getValue();
}
Utils.L1Normalization(background_probability);
// initialize term topic matrix p(w|z,\phi)
for (int i = 0; i < number_of_topics; i++) Utils.randomize(word_topic_sstat[i], d_beta - 1.0);
imposePrior();
calculate_M_step(0);
}
Aggregations