use of structures._Stn in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method initTest4Dynamical.
public void initTest4Dynamical(ArrayList<_Doc> sampleTestSet, _Doc d, int commentNum) {
_ParentDoc pDoc = (_ParentDoc) d;
pDoc.m_childDocs4Dynamic = new ArrayList<_ChildDoc>();
pDoc.setTopics4Gibbs(number_of_topics, 0);
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
sampleTestSet.add(pDoc);
int count = 0;
for (_ChildDoc cDoc : pDoc.m_childDocs) {
if (count >= commentNum) {
break;
}
count++;
cDoc.setTopics4Gibbs_LDA(number_of_topics, 0);
sampleTestSet.add(cDoc);
pDoc.addChildDoc4Dynamics(cDoc);
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class weightedCorrespondenceModel method initialize_probability.
@Override
protected void initialize_probability(Collection<_Doc> collection) {
init();
for (_Doc d : collection) {
if (d instanceof _ParentDoc4DCM) {
int totalWords = 0;
double totalLambda = 0;
m_parentDocNum += 1;
_ParentDoc4DCM pDoc = (_ParentDoc4DCM) d;
pDoc.setTopics4Variational(number_of_topics, d_alpha, vocabulary_size, d_beta);
totalWords += pDoc.getTotalDocLength();
for (_Stn stnObj : pDoc.getSentences()) stnObj.setTopicsVct(number_of_topics);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
totalWords += cDoc.getTotalDocLength();
m_childDocNum += 1;
cDoc.setTopics4Variational(number_of_topics, d_alpha);
// update the article thread sufficient statistics
for (int n = 0; n < cDoc.getSparse().length; n++) {
_SparseFeature fv = cDoc.getSparse()[n];
int wID = fv.getIndex();
double wVal = fv.getValue();
for (int k = 0; k < number_of_topics; k++) {
pDoc.m_lambda_stat[k][wID] += cDoc.m_phi[n][k] * wVal;
}
}
}
for (int k = 0; k < number_of_topics; k++) {
pDoc.m_lambda_topicStat[k] = Utils.sumOfArray(pDoc.m_lambda_stat[k]);
totalLambda += pDoc.m_lambda_topicStat[k];
}
// System.out.println("totalWords\t"+totalWords+"\t"+totalLambda);
}
}
imposePrior();
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method printTopChild4Stn.
protected void printTopChild4Stn(String filePrefix) {
String topChild4StnFile = filePrefix + "/topChild4Stn.txt";
try {
PrintWriter pw = new PrintWriter(new File(topChild4StnFile));
for (_Doc d : m_corpus.getCollection()) {
if (d instanceof _ParentDoc) {
_ParentDoc pDoc = (_ParentDoc) d;
pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
for (_Stn stnObj : pDoc.getSentences()) {
// HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
HashMap<String, Double> likelihoodMap = rankChild4StnByLanguageModel(stnObj, pDoc);
// int i=0;
pw.print((stnObj.getIndex() + 1) + "\t");
for (Map.Entry<String, Double> e : sortHashMap4String(likelihoodMap, true)) {
// if(i==topK)
// break;
pw.print(e.getKey());
pw.print(":" + e.getValue());
pw.print("\t");
// i++;
}
pw.println();
}
}
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class languageModelBaseLine method rankChild4StnByLikelihood.
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
for (_ChildDoc cDoc : pDoc.m_childDocs) {
int cDocLen = cDoc.getTotalDocLength();
_SparseFeature[] fv = cDoc.getSparse();
double stnLogLikelihood = 0;
double alphaDoc = m_smoothingMu / (m_smoothingMu + cDocLen);
_SparseFeature[] sv = stnObj.getFv();
for (_SparseFeature svWord : sv) {
double featureLikelihood = 0;
int wid = svWord.getIndex();
double stnVal = svWord.getValue();
int featureIndex = Utils.indexOf(fv, wid);
if (featureIndex == -1)
continue;
double docVal = fv[featureIndex].getValue();
double smoothingProb = docVal / (m_smoothingMu + cDocLen);
smoothingProb += m_smoothingMu * m_wordSstat.get(wid) / (m_smoothingMu + cDocLen);
featureLikelihood = Math.log(smoothingProb / (alphaDoc * m_wordSstat.get(wid)));
stnLogLikelihood += stnVal * featureLikelihood;
}
stnLogLikelihood += stnObj.getLength() * Math.log(alphaDoc);
childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
}
return childLikelihoodMap;
}
use of structures._Stn in project IR_Base by Linda-sunshine.
the class HTMM method ComputeEmissionProbsForDoc.
// Construct the emission probabilities for sentences under different topics in a particular document.
void ComputeEmissionProbsForDoc(_Doc d) {
for (int i = 0; i < d.getSenetenceSize(); i++) {
_Stn stn = d.getSentence(i);
Arrays.fill(emission[i], 0);
for (int k = 0; k < this.number_of_topics; k++) {
for (_SparseFeature w : stn.getFv()) {
// all in log-space
emission[i][k] += w.getValue() * topic_term_probabilty[k][w.getIndex()];
}
}
}
}
Aggregations