use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM_CHard method cal_logLikelihood_partial4Child.
@Override
protected double cal_logLikelihood_partial4Child(_Doc d) {
_ChildDoc4BaseWithPhi_Hard cDoc = (_ChildDoc4BaseWithPhi_Hard) d;
double docLogLikelihood = 0.0;
double gammaLen = Utils.sumOfArray(m_gamma);
double cDocXSum = Utils.sumOfArray(cDoc.m_xSstat);
for (_Word w : cDoc.getTestWords()) {
int wid = w.getIndex();
double wordLogLikelihood = 0;
if (Utils.indexOf(cDoc.m_parentDoc.getSparse(), wid) != -1) {
for (int k = 0; k < number_of_topics; k++) {
double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDocProb(k, cDoc);
wordLogLikelihood += wordPerTopicLikelihood;
}
} else {
for (int k = 0; k < number_of_topics; k++) {
double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDocProb(k, cDoc) * childXInDocProb(0, cDoc) / (cDocXSum + gammaLen);
wordLogLikelihood += wordPerTopicLikelihood;
}
double wordPerTopicLikelihood = childLocalWordByTopicProb(wid, cDoc) * childXInDocProb(1, cDoc) / (cDocXSum + gammaLen);
wordLogLikelihood += wordPerTopicLikelihood;
}
if (Math.abs(wordLogLikelihood) < 1e-10) {
System.out.println("wordLoglikelihood\t" + wordLogLikelihood);
wordLogLikelihood += 1e-10;
}
wordLogLikelihood = Math.log(wordLogLikelihood);
docLogLikelihood += wordLogLikelihood;
}
return docLogLikelihood;
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM_CZLR method setFeatures4Word.
protected void setFeatures4Word(ArrayList<_Doc> docList) {
for (_Doc d : docList) {
if (d instanceof _ParentDoc)
continue;
_SparseFeature[] sfs = d.getSparse();
for (_Word w : d.getWords()) {
int wid = w.getIndex();
int wIndex = Utils.indexOf(sfs, wid);
_SparseFeature sf = sfs[wIndex];
w.setFeatures(sf.getValues());
}
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM_CZLR method updateFeatureWeight.
public void updateFeatureWeight(_ParentDoc pDoc, int iter, File weightIterFolder) {
int totalChildWordNum = 0;
int featureLen = 0;
ArrayList<Double> targetValList = new ArrayList<Double>();
ArrayList<Feature[]> featureList = new ArrayList<Feature[]>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
for (_Word w : cDoc.getWords()) {
double[] wordFeatures = w.getFeatures();
double x = w.getX();
featureLen = wordFeatures.length;
Feature[] featureVec = new Feature[featureLen];
for (int i = 0; i < featureLen; i++) {
featureVec[i] = new FeatureNode(i + 1, wordFeatures[i]);
}
featureList.add(featureVec);
targetValList.add(x);
}
}
totalChildWordNum = featureList.size();
double[] targetVal = new double[totalChildWordNum];
Feature[][] featureMatrix = new Feature[totalChildWordNum][];
for (int i = 0; i < totalChildWordNum; i++) {
featureMatrix[i] = featureList.get(i);
}
for (int i = 0; i < totalChildWordNum; i++) {
targetVal[i] = targetValList.get(i);
}
Problem problem = new Problem();
problem.l = totalChildWordNum;
// featureNum
problem.n = featureLen + 1;
problem.x = featureMatrix;
problem.y = targetVal;
SolverType solver = SolverType.L2R_LR;
double C = 1.0;
double eps = 0.01;
Parameter param = new Parameter(solver, C, eps);
Model model = Linear.train(problem, param);
int featureNum = model.getNrFeature();
for (int i = 0; i < featureNum; i++) pDoc.m_featureWeight[i] = model.getDecfunCoef(i, 0);
String weightFile = pDoc.getName() + ".txt";
File modelFile = new File(weightIterFolder, weightFile);
try {
// if((iter>200)&&(iter%100==0))
model.save(modelFile);
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printParentTopicAssignment.
protected void printParentTopicAssignment(_Doc d, File topicFolder) {
// System.out.println("printing topic assignment parent documents");
_ParentDoc pDoc = (_ParentDoc) d;
String topicAssignmentFile = pDoc.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Stn stnObj : pDoc.getSentences()) {
pw.print(stnObj.getIndex() + "\t");
for (_Word w : stnObj.getWords()) {
int index = w.getIndex();
int topic = w.getTopic();
String featureName = m_corpus.getFeature(index);
// System.out.println("test\t"+featureName+"\tdocName\t"+d.getName());
pw.print(featureName + ":" + topic + "\t");
}
pw.println();
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of structures._Word in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printChildTopicAssignment.
protected void printChildTopicAssignment(_Doc d, File topicFolder) {
// System.out.println("printing topic assignment parent documents");
String topicAssignmentFile = d.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Word w : d.getWords()) {
int index = w.getIndex();
int topic = w.getTopic();
String featureName = m_corpus.getFeature(index);
pw.print(featureName + ":" + topic + "\t");
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations