use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method generateLanguageModel.
protected void generateLanguageModel() {
double totalWord = 0;
for (_Doc d : m_corpus.getCollection()) {
if (d instanceof _ParentDoc)
continue;
_SparseFeature[] fv = d.getSparse();
for (int i = 0; i < fv.length; i++) {
int wid = fv[i].getIndex();
double val = fv[i].getValue();
totalWord += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
}
for (int wid : m_wordSstat.keySet()) {
double val = m_wordSstat.get(wid);
double prob = val / totalWord;
m_wordSstat.put(wid, prob);
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printParentPhi.
protected void printParentPhi(_Doc d, File phiFolder) {
_ParentDoc pDoc = (_ParentDoc) d;
String parentPhiFileName = pDoc.getName() + ".txt";
_SparseFeature[] fv = pDoc.getSparse();
try {
PrintWriter parentPW = new PrintWriter(new File(phiFolder, parentPhiFileName));
for (int n = 0; n < fv.length; n++) {
int index = fv[n].getIndex();
String featureName = m_corpus.getFeature(index);
parentPW.print(featureName + ":\t");
for (int k = 0; k < number_of_topics; k++) parentPW.print(pDoc.m_phi[n][k] + "\t");
parentPW.println();
}
parentPW.flush();
parentPW.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class AspectAnalyzer method collectStats.
void collectStats(_Doc d) {
int aspectID, wordID;
for (_Stn s : d.getSentences()) {
if ((aspectID = s.getTopic()) > -1) {
// if it is annotated
for (_SparseFeature f : s.getFv()) {
wordID = f.getIndex();
m_featureStat.get(m_featureNames.get(wordID)).addOneDF(aspectID);
}
m_aspectDist[aspectID]++;
}
}
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class IndividualSVM method createLibLinearFV.
public Feature[] createLibLinearFV(_Review r, int userIndex) {
int fIndex;
double fValue;
_SparseFeature fv;
_SparseFeature[] fvs = r.getSparse();
Feature[] node;
if (m_bias)
node = new Feature[fvs.length + 1];
else
node = new Feature[fvs.length];
for (int i = 0; i < fvs.length; i++) {
fv = fvs[i];
// liblinear's feature index starts from one
fIndex = fv.getIndex() + 1;
fValue = fv.getValue();
// Construct the user part of the training instance.
node[i] = new FeatureNode(fIndex, fValue);
}
if (// add the bias term
m_bias)
// user model's bias
node[fvs.length] = new FeatureNode(m_featureSize + 1, 1.0);
return node;
}
use of structures._SparseFeature in project IR_Base by Linda-sunshine.
the class LogisticRegression method calcFuncGradient.
// This function is used to calculate the value and gradient with the new beta.
protected double calcFuncGradient(Collection<_Doc> trainSet) {
double gValue = 0, fValue = 0;
double Pij = 0, logPij = 0;
// Add the L2 regularization.
double L2 = 0, b;
for (int i = 0; i < m_beta.length; i++) {
b = m_beta[i];
m_g[i] = 2 * m_lambda * b;
L2 += b * b;
}
// The computation complexity is n*classNo.
int Yi;
_SparseFeature[] fv;
double weight;
for (_Doc doc : trainSet) {
Yi = doc.getYLabel();
fv = doc.getSparse();
weight = doc.getWeight();
// compute P(Y=j|X=xi)
calcPosterior(fv, m_cache);
for (int j = 0; j < m_classNo; j++) {
Pij = m_cache[j];
logPij = Math.log(Pij);
if (Yi == j) {
gValue = Pij - 1.0;
fValue += logPij * weight;
} else
gValue = Pij;
// weight might be different for different documents
gValue *= weight;
int offset = j * (m_featureSize + 1);
m_g[offset] += gValue;
// (Yij - Pij) * Xi
for (_SparseFeature sf : fv) m_g[offset + sf.getIndex() + 1] += gValue * sf.getValue();
}
}
// LBFGS is used to calculate the minimum value while we are trying to calculate the maximum likelihood.
return m_lambda * L2 - fValue;
}
Aggregations