use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC_test method printTopKStn4Child.
protected void printTopKStn4Child(String filePrefix, int topK) {
String topKStn4ChildFile = filePrefix + "topStn4Child.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKStn4ChildFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.m_childDocs.size());
for (_ChildDoc cDoc : pDoc.m_childDocs) {
HashMap<Integer, Double> stnSimMap = rankStn4ChildBySim(pDoc, cDoc);
int i = 0;
pw.print(cDoc.getName() + "\t");
for (Map.Entry<Integer, Double> e : sortHashMap4Integer(stnSimMap, true)) {
// if(i==topK)
// break;
pw.print(e.getKey());
pw.print(":" + e.getValue());
pw.print("\t");
i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC_test method initTest4Dynamical.
// dynamical add comments to sampleTest
public void initTest4Dynamical(ArrayList<_Doc> sampleTestSet, _Doc d, int commentNum) {
_ParentDoc pDoc = (_ParentDoc) d;
pDoc.m_childDocs4Dynamic = new ArrayList<_ChildDoc>();
pDoc.setTopics4Gibbs(number_of_topics, d_alpha);
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
sampleTestSet.add(pDoc);
int count = 0;
for (_ChildDoc cDoc : pDoc.m_childDocs) {
if (count >= commentNum) {
break;
}
count++;
cDoc.setTopics4Gibbs_LDA(number_of_topics, d_alpha);
sampleTestSet.add(cDoc);
pDoc.addChildDoc4Dynamics(cDoc);
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC_test method printTopKChild4Stn.
protected void printTopKChild4Stn(String filePrefix, int topK) {
String topKChild4StnFile = filePrefix + "topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
// HashMap<String, Double> likelihoodMap =
// rankChild4StnByHybrid(stnObj, pDoc);
// HashMap<String, Double> likelihoodMap =
// rankChild4StnByLanguageModel(stnObj, pDoc);
int i = 0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (Map.Entry<String, Double> e : sortHashMap4String(likelihoodMap, true)) {
// if(i==topK)
// break;
pw.print(e.getKey());
pw.print(":" + e.getValue());
pw.print("\t");
i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC_test method debugOutput.
public void debugOutput(String filePrefix) {
File topicFolder = new File(filePrefix + "topicAssignment");
if (!topicFolder.exists()) {
System.out.println("creating directory" + topicFolder);
topicFolder.mkdir();
}
File childTopKStnFolder = new File(filePrefix + "topKStn");
if (!childTopKStnFolder.exists()) {
System.out.println("creating top K stn directory\t" + childTopKStnFolder);
childTopKStnFolder.mkdir();
}
File stnTopKChildFolder = new File(filePrefix + "topKChild");
if (!stnTopKChildFolder.exists()) {
System.out.println("creating top K child directory\t" + stnTopKChildFolder);
stnTopKChildFolder.mkdir();
}
int topKStn = 10;
int topKChild = 10;
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
printParentTopicAssignment(d, topicFolder);
} else if (d instanceof _ChildDoc) {
printChildTopicAssignment(d, topicFolder);
}
// if(d instanceof _ParentDoc){
// printTopKChild4Stn(topKChild, (_ParentDoc)d, stnTopKChildFolder);
// printTopKStn4Child(topKStn, (_ParentDoc)d, childTopKStnFolder);
// }
}
String parentParameterFile = filePrefix + "parentParameter.txt";
String childParameterFile = filePrefix + "childParameter.txt";
printParameter(parentParameterFile, childParameterFile, m_trainSet);
// printTestParameter4Spam(filePrefix);
String similarityFile = filePrefix + "topicSimilarity.txt";
discoverSpecificComments(similarityFile);
printEntropy(filePrefix);
printTopKChild4Parent(filePrefix, topKChild);
printTopKChild4Stn(filePrefix, topKChild);
printTopKChild4StnWithHybrid(filePrefix, topKChild);
printTopKChild4StnWithHybridPro(filePrefix, topKChild);
printTopKStn4Child(filePrefix, topKStn);
}
use of structures._ParentDoc in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC_test method rankChild4StnByHybrid.
protected HashMap<String, Double> rankChild4StnByHybrid(_Stn stnObj, _ParentDoc pDoc) {
HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
double smoothingMu = m_LM.m_smoothingMu;
for (_ChildDoc cDoc : pDoc.m_childDocs) {
double cDocLen = cDoc.getTotalDocLength();
_SparseFeature[] fv = cDoc.getSparse();
double stnLogLikelihood = 0;
double alphaDoc = smoothingMu / (smoothingMu + cDocLen);
_SparseFeature[] sv = stnObj.getFv();
for (_SparseFeature svWord : sv) {
double featureLikelihood = 0;
int wid = svWord.getIndex();
double stnVal = svWord.getValue();
int featureIndex = Utils.indexOf(fv, wid);
double docVal = 0;
if (featureIndex != -1) {
docVal = fv[featureIndex].getValue();
}
double LMLikelihood = (1 - alphaDoc) * docVal / (cDocLen);
LMLikelihood += alphaDoc * m_LM.getReferenceProb(wid);
double TMLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
// double likelihoodPerTopic =
// topic_term_probabilty[k][wid];
// System.out.println("likelihoodPerTopic1-----\t"+likelihoodPerTopic);
//
// likelihoodPerTopic *= cDoc.m_topics[k];
// System.out.println("likelihoodPerTopic2-----\t"+likelihoodPerTopic);
TMLikelihood += (word_topic_sstat[k][wid] / m_sstat[k]) * (topicInDocProb(k, cDoc) / (d_alpha * number_of_topics + cDocLen));
// TMLikelihood +=
// topic_term_probabilty[k][wid]*cDoc.m_topics[k];
// System.out.println("TMLikelihood\t"+TMLikelihood);
}
featureLikelihood = m_tau * LMLikelihood + (1 - m_tau) * TMLikelihood;
// featureLikelihood = TMLikelihood;
featureLikelihood = Math.log(featureLikelihood);
stnLogLikelihood += stnVal * featureLikelihood;
}
childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return childLikelihoodMap;
}
Aggregations