use of structures._Stn in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printParentTopicAssignment.
protected void printParentTopicAssignment(_Doc d, File topicFolder) {
// System.out.println("printing topic assignment parent documents");
_ParentDoc pDoc = (_ParentDoc) d;
String topicAssignmentFile = pDoc.getName() + ".txt";
try {
PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
for (_Stn stnObj : pDoc.getSentences()) {
pw.print(stnObj.getIndex() + "\t");
for (_Word w : stnObj.getWords()) {
int index = w.getIndex();
int topic = w.getTopic();
String featureName = m_corpus.getFeature(index);
// System.out.println("test\t"+featureName+"\tdocName\t"+d.getName());
pw.print(featureName + ":" + topic + "\t");
}
pw.println();
}
pw.flush();
pw.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class ACCTM_C_test method printTopKChild4Stn.
protected void printTopKChild4Stn(String filePrefix, int topK) {
String topKChild4StnFile = filePrefix + "topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
int i = 0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (String childDocName : likelihoodMap.keySet()) {
// if(i==topK)
// break;
pw.print(childDocName);
pw.print(":" + likelihoodMap.get(childDocName));
pw.print("\t");
i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class ACCTM_test method printParameter.
public void printParameter(String parentParameterFile, String childParameterFile, ArrayList<_Doc> docList) {
System.out.println("printing parameter");
try {
System.out.println(parentParameterFile);
System.out.println(childParameterFile);
PrintWriter parentParaOut = new PrintWriter(new File(parentParameterFile));
PrintWriter childParaOut = new PrintWriter(new File(childParameterFile));
for (_Doc d : docList) {
if (d instanceof _ParentDoc) {
parentParaOut.print(d.getName() + "\t");
parentParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(d.m_topics[k] + "\t");
}
for (_Stn stnObj : d.getSentences()) {
parentParaOut.print("sentence" + (stnObj.getIndex() + 1) + "\t");
for (int k = 0; k < number_of_topics; k++) {
parentParaOut.print(stnObj.m_topics[k] + "\t");
}
}
parentParaOut.println();
for (_ChildDoc cDoc : ((_ParentDoc) d).m_childDocs) {
childParaOut.print(d.getName() + "\t");
childParaOut.print(cDoc.getName() + "\t");
childParaOut.print("topicProportion\t");
for (int k = 0; k < number_of_topics; k++) {
childParaOut.print(cDoc.m_topics + "\t");
}
childParaOut.println();
}
}
}
parentParaOut.flush();
parentParaOut.close();
childParaOut.flush();
childParaOut.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class DCMCorrLDA method initTest.
protected void initTest(ArrayList<_Doc> sampleTestSet, _Doc d) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
int testLength = 0;
pDoc.setTopics4GibbsTest(number_of_topics, 0, testLength, vocabulary_size);
sampleTestSet.add(pDoc);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
testLength = (int) (m_testWord4PerplexityProportion * cDoc.getTotalDocLength());
cDoc.setTopics4GibbsTest(number_of_topics, d_alpha, testLength);
for (_Word w : cDoc.getWords()) {
int wid = w.getIndex();
int tid = w.getTopic();
pDoc.m_wordTopic_stat[tid][wid]++;
pDoc.m_topic_stat[tid]++;
}
sampleTestSet.add(cDoc);
cDoc.createSparseVct4Infer();
// cDoc computeMu
computeTestMu4Doc(cDoc);
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class DCMCorrLDA_multi_E_test method printTopKChild4Stn.
protected void printTopKChild4Stn(String filePrefix, int topK) {
String topKChild4StnFile = filePrefix + "topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
for (_Doc d : m_trainSet) {
if (d instanceof _ParentDoc4DCM) {
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
int i = 0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (String e : likelihoodMap.keySet()) {
pw.print(e);
pw.print(":" + likelihoodMap.get(e));
pw.print("\t");
i++;
}
pw.println();
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations