use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method generateLanguageModel.
protected void generateLanguageModel() {
double totalWord = 0;
for (_Doc d : m_corpus.getCollection()) {
if (d instanceof _ParentDoc)
continue;
_SparseFeature[] fv = d.getSparse();
for (int i = 0; i < fv.length; i++) {
int wid = fv[i].getIndex();
double val = fv[i].getValue();
totalWord += val;
if (m_wordSstat.containsKey(wid)) {
double oldVal = m_wordSstat.get(wid);
m_wordSstat.put(wid, oldVal + val);
} else {
m_wordSstat.put(wid, val);
}
}
}
for (int wid : m_wordSstat.keySet()) {
double val = m_wordSstat.get(wid);
double prob = val / totalWord;
m_wordSstat.put(wid, prob);
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printParentPhi.
protected void printParentPhi(_Doc d, File phiFolder) {
_ParentDoc pDoc = (_ParentDoc) d;
String parentPhiFileName = pDoc.getName() + ".txt";
_SparseFeature[] fv = pDoc.getSparse();
try {
PrintWriter parentPW = new PrintWriter(new File(phiFolder, parentPhiFileName));
for (int n = 0; n < fv.length; n++) {
int index = fv[n].getIndex();
String featureName = m_corpus.getFeature(index);
parentPW.print(featureName + ":\t");
for (int k = 0; k < number_of_topics; k++) parentPW.print(pDoc.m_phi[n][k] + "\t");
parentPW.println();
}
parentPW.flush();
parentPW.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printTopKChild4Stn.
protected void printTopKChild4Stn(String filePrefix, int topK) {
String topKChild4StnFile = filePrefix + "topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
int i = 0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (String childDocName : likelihoodMap.keySet()) {
// if(i==topK)
// break;
pw.print(childDocName);
pw.print(":" + likelihoodMap.get(childDocName));
pw.print("\t");
i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_test method printParameter.
public void printParameter(String parentParameterFile, String childParameterFile, ArrayList<_Doc> docList) {
System.out.println("printing parameter");
try {
System.out.println(parentParameterFile);
System.out.println(childParameterFile);
PrintWriter parentParaOut = new PrintWriter(new File(parentParameterFile));
PrintWriter childParaOut = new PrintWriter(new File(childParameterFile));
for (_Doc d : docList) {
if (d instanceof _ParentDoc) {
parentParaOut.print(d.getName() + "\t");
parentParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(d.m_topics[k] + "\t");
}
for (_Stn stnObj : d.getSentences()) {
parentParaOut.print("sentence" + (stnObj.getIndex() + 1) + "\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(stnObj.m_topics[k] + "\t");
}
}
parentParaOut.println();
for (_ChildDoc cDoc : ((_ParentDoc) d).m_childDocs) {
childParaOut.print(d.getName() + "\t");
childParaOut.print(cDoc.getName() + "\t");
childParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
childParaOut.print(cDoc.m_topics + "\t");
}
childParaOut.println();
}
}
}
parentParaOut.flush();
parentParaOut.close();
childParaOut.flush();
childParaOut.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class DCMCorrLDA method updateAlpha.
protected void updateAlpha() {
double diff = 0;
int iteration = 0;
do {
diff = 0;
double totalAlphaDenominator = 0;
m_totalAlpha = Utils.sumOfArray(m_alpha);
double digAlpha = Utils.digamma(m_totalAlpha);
double deltaAlpha = 0;
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
totalAlphaDenominator += Utils.digamma(d.getTotalDocLength() + m_totalAlpha) - digAlpha;
}
}
for (int k = 0; k < number_of_topics; k++) {
double totalAlphaNumerator = 0;
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc)
totalAlphaNumerator += Utils.digamma(m_alpha[k] + d.m_sstat[k]) - Utils.digamma(m_alpha[k]);
}
deltaAlpha = totalAlphaNumerator * 1.0 / totalAlphaDenominator;
double newAlpha = m_alpha[k] * deltaAlpha;
double t_diff = Math.abs(m_alpha[k] - newAlpha);
if (t_diff > diff)
diff = t_diff;
m_alpha[k] = newAlpha;
}
iteration++;
if (iteration > m_newtonIter)
break;
} while (diff > m_newtonConverge);
// System.out.println("iteration\t"+iteration);
m_totalAlpha = 0;
for (int k = 0; k < number_of_topics; k++) {
m_totalAlpha += m_alpha[k];
}
}
Aggregations