use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_CZLR method update_M_step.
protected void update_M_step(int iter, File weightFolder) {
if (m_statisticsNormalized) {
System.err.println("The statistics collector has been normlaized before, cannot further accumulate the samples!");
System.exit(-1);
}
for (int i = 0; i < this.number_of_topics; i++) {
for (int v = 0; v < this.vocabulary_size; v++) {
// collect the current sample
topic_term_probabilty[i][v] += word_topic_sstat[i][v];
}
}
// used to estimate final theta for each document
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc)
collectParentStats((_ParentDoc) d);
else if (d instanceof _ChildDoc)
collectChildStats((_ChildDoc) d);
}
File weightIterFolder = new File(weightFolder, "_" + iter);
if (!weightIterFolder.exists()) {
weightIterFolder.mkdir();
}
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc)
updateFeatureWeight((_ParentDoc) d, iter, weightIterFolder);
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_CZLR method updateFeatureWeight.
public void updateFeatureWeight(_ParentDoc pDoc, int iter, File weightIterFolder) {
int totalChildWordNum = 0;
int featureLen = 0;
ArrayList<Double> targetValList = new ArrayList<Double>();
ArrayList<Feature[]> featureList = new ArrayList<Feature[]>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
for (_Word w : cDoc.getWords()) {
double[] wordFeatures = w.getFeatures();
double x = w.getX();
featureLen = wordFeatures.length;
Feature[] featureVec = new Feature[featureLen];
for (int i = 0; i < featureLen; i++) {
featureVec[i] = new FeatureNode(i + 1, wordFeatures[i]);
}
featureList.add(featureVec);
targetValList.add(x);
}
}
totalChildWordNum = featureList.size();
double[] targetVal = new double[totalChildWordNum];
Feature[][] featureMatrix = new Feature[totalChildWordNum][];
for (int i = 0; i < totalChildWordNum; i++) {
featureMatrix[i] = featureList.get(i);
}
for (int i = 0; i < totalChildWordNum; i++) {
targetVal[i] = targetValList.get(i);
}
Problem problem = new Problem();
problem.l = totalChildWordNum;
// featureNum
problem.n = featureLen + 1;
problem.x = featureMatrix;
problem.y = targetVal;
SolverType solver = SolverType.L2R_LR;
double C = 1.0;
double eps = 0.01;
Parameter param = new Parameter(solver, C, eps);
Model model = Linear.train(problem, param);
int featureNum = model.getNrFeature();
for (int i = 0; i < featureNum; i++) pDoc.m_featureWeight[i] = model.getDecfunCoef(i, 0);
String weightFile = pDoc.getName() + ".txt";
File modelFile = new File(weightIterFolder, weightFile);
try {
// if((iter>200)&&(iter%100==0))
model.save(modelFile);
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printParameter.
public void printParameter(String parentParameterFile, String childParameterFile, ArrayList<_Doc> docList) {
System.out.println("printing parameter");
try {
System.out.println(parentParameterFile);
System.out.println(childParameterFile);
PrintWriter parentParaOut = new PrintWriter(new File(parentParameterFile));
PrintWriter childParaOut = new PrintWriter(new File(childParameterFile));
for (_Doc d : docList) {
if (d instanceof _ParentDoc) {
parentParaOut.print(d.getName() + "\t");
parentParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(d.m_topics[k] + "\t");
}
for (_Stn stnObj : d.getSentences()) {
parentParaOut.print("sentence" + (stnObj.getIndex() + 1) + "\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(stnObj.m_topics[k] + "\t");
}
}
parentParaOut.println();
for (_ChildDoc cDoc : ((_ParentDoc) d).m_childDocs) {
childParaOut.print(d.getName() + "\t");
childParaOut.print(cDoc.getName() + "\t");
childParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
childParaOut.print(cDoc.m_xTopics[0][k] + "\t");
}
childParaOut.print("xProportion\t");
for (int x = 0; x < m_gamma.length; x++) {
childParaOut.print(cDoc.m_xProportion[x] + "\t");
}
childParaOut.println();
}
}
}
parentParaOut.flush();
parentParaOut.close();
childParaOut.flush();
childParaOut.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printXProportion.
public void printXProportion(String xProportionFile, ArrayList<_Doc> docList) {
System.out.println("x proportion for parent doc");
try {
PrintWriter pw = new PrintWriter(new File(xProportionFile));
for (_Doc d : docList) {
if (d instanceof _ParentDoc) {
for (_ChildDoc doc : ((_ParentDoc) d).m_childDocs) {
_ChildDoc4BaseWithPhi cDoc = (_ChildDoc4BaseWithPhi) doc;
pw.print(d.getName() + "\t");
pw.print(cDoc.getName() + "\t");
pw.print(cDoc.m_xProportion[0] + "\t");
pw.print(cDoc.m_xProportion[1]);
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printParentTopicAssignment.
protected void printParentTopicAssignment(_Doc d, File topicFolder) {
// System.out.println("printing topic assignment parent documents");
_ParentDoc pDoc = (_ParentDoc) d;
String topicAssignmentFile = pDoc.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Stn stnObj : pDoc.getSentences()) {
pw.print(stnObj.getIndex() + "\t");
for (_Word w : stnObj.getWords()) {
int index = w.getIndex();
int topic = w.getTopic();
String featureName = m_corpus.getFeature(index);
// System.out.println("test\t"+featureName+"\tdocName\t"+d.getName());
pw.print(featureName + ":" + topic + "\t");
}
pw.println();
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations