use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_CZLR method initTest4Spam.
public void initTest4Spam(ArrayList<_Doc> sampleTestSet, _Doc d) {
_ParentDoc pDoc = (_ParentDoc) d;
pDoc.setTopics4Gibbs(number_of_topics, 0);
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
sampleTestSet.add(pDoc);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
((_ChildDoc4BaseWithPhi) cDoc).createXSpace(number_of_topics, m_gamma.length, vocabulary_size, d_beta);
((_ChildDoc4BaseWithPhi) cDoc).setTopics4Gibbs(number_of_topics, 0);
sampleTestSet.add(cDoc);
cDoc.setParentDoc(pDoc);
computeMu4Doc(cDoc);
}
setFeatures4Word(sampleTestSet);
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method rankStn4ChildBySim.
protected HashMap<Integer, Double> rankStn4ChildBySim(_ParentDoc pDoc, _ChildDoc cDoc) {
HashMap<Integer, Double> stnSimMap = new HashMap<Integer, Double>();
for (_Stn stnObj : pDoc.getSentences()) {
double stnKL = Utils.klDivergence(cDoc.m_xTopics[0], stnObj.m_topics);
stnSimMap.put(stnObj.getIndex() + 1, -stnKL);
}
return stnSimMap;
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method rankChild4StnByLikelihood.
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
double gammaLen = Utils.sumOfArray(m_gamma);
HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
double cDocTopicSum = Utils.sumOfArray(cDoc.m_xSstat);
double stnLogLikelihood = 0;
for (_Word w : stnObj.getWords()) {
int wid = w.getIndex();
double wordLogLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
// double wordPerTopicLikelihood = childWordByTopicProb(k, wid)
// * childTopicInDocProb(k, cDoc)
// * childXInDocProb(0, cDoc)
// / (gammaLen + cDocTopicSum);
double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDoc(k, cDoc);
wordLogLikelihood += wordPerTopicLikelihood;
}
stnLogLikelihood += Math.log(wordLogLikelihood);
}
childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return childLikelihoodMap;
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method debugOutput.
public void debugOutput(int topK, String filePrefix) {
File parentTopicFolder = new File(filePrefix + "parentTopicAssignment");
File childTopicFolder = new File(filePrefix + "childTopicAssignment");
File childLocalWordTopicFolder = new File(filePrefix + "childLocalTopic");
if (!parentTopicFolder.exists()) {
System.out.println("creating directory" + parentTopicFolder);
parentTopicFolder.mkdir();
}
if (!childTopicFolder.exists()) {
System.out.println("creating directory" + childTopicFolder);
childTopicFolder.mkdir();
}
if (!childLocalWordTopicFolder.exists()) {
System.out.println("creating directory" + childLocalWordTopicFolder);
childLocalWordTopicFolder.mkdir();
}
File parentPhiFolder = new File(filePrefix + "parentPhi");
File childPhiFolder = new File(filePrefix + "childPhi");
if (!parentPhiFolder.exists()) {
System.out.println("creating directory" + parentPhiFolder);
parentPhiFolder.mkdir();
}
if (!childPhiFolder.exists()) {
System.out.println("creating directory" + childPhiFolder);
childPhiFolder.mkdir();
}
File childXFolder = new File(filePrefix + "xValue");
if (!childXFolder.exists()) {
System.out.println("creating x Value directory" + childXFolder);
childXFolder.mkdir();
}
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
printParentTopicAssignment(d, parentTopicFolder);
printParentPhi(d, parentPhiFolder);
} else if (d instanceof _ChildDoc) {
printChildTopicAssignment(d, childTopicFolder);
printChildLocalWordTopicDistribution((_ChildDoc4BaseWithPhi) d, childLocalWordTopicFolder);
printXValue(d, childXFolder);
}
}
String parentParameterFile = filePrefix + "parentParameter.txt";
String childParameterFile = filePrefix + "childParameter.txt";
printParameter(parentParameterFile, childParameterFile, m_trainSet);
String xProportionFile = filePrefix + "childXProportion.txt";
printXProportion(xProportionFile, m_trainSet);
String similarityFile = filePrefix + "topicSimilarity.txt";
printEntropy(filePrefix);
int topKStn = 10;
int topKChild = 10;
printTopKChild4Stn(filePrefix, topKChild);
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class ACCTM_test method printTopKChild4Stn.
protected void printTopKChild4Stn(String filePrefix, int topK) {
String topKChild4StnFile = filePrefix + "topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
int i = 0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (String childDocName : likelihoodMap.keySet()) {
// if(i==topK)
// break;
pw.print(childDocName);
pw.print(":" + likelihoodMap.get(childDocName));
pw.print("\t");
i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations