use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class LDAGibbs4AC_test method printTopKStn4Child.
protected void printTopKStn4Child(int topK, _ParentDoc pDoc, File topKStnFolder) {
File topKStn4PDocFolder = new File(topKStnFolder, pDoc.getName());
if (!topKStn4PDocFolder.exists()) {
// System.out.println("creating top K stn directory\t"+topKStn4PDocFolder);
topKStn4PDocFolder.mkdir();
}
for (_ChildDoc cDoc : pDoc.m_childDocs) {
String topKStn4ChildFile = cDoc.getName() + ".txt";
HashMap<Integer, Double> stnSimMap = rankStn4ChildBySim(pDoc, cDoc);
try {
int i = 0;
PrintWriter pw = new PrintWriter(new File(topKStn4PDocFolder, topKStn4ChildFile));
for (Map.Entry<Integer, Double> e : sortHashMap4Integer(stnSimMap, true)) {
if (i == topK)
break;
pw.print(e.getKey());
pw.print("\t" + e.getValue());
pw.println();
i++;
}
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method calculate_log_likelihood4Child.
protected double calculate_log_likelihood4Child(_Doc d) {
_ChildDoc cDoc = (_ChildDoc) d;
double docLogLikelihood = 0;
_SparseFeature[] fv = cDoc.getSparse();
double docTopicSum = Utils.sumOfArray(cDoc.m_sstat);
double smoothingSum = m_smoothingParam * number_of_topics;
for (int i = 0; i < fv.length; i++) {
int wid = fv[i].getIndex();
double value = fv[i].getValue();
double wordLogLikelihood = 0;
for (int k = 0; k < number_of_topics; k++) {
double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDoc(k, cDoc) / (smoothingSum + docTopicSum);
wordLogLikelihood += wordPerTopicLikelihood;
}
if (wordLogLikelihood < 1e-10) {
wordLogLikelihood += 1e-10;
System.out.println("small likelihood in child");
}
wordLogLikelihood = Math.log(wordLogLikelihood);
docLogLikelihood += value * wordLogLikelihood;
}
return docLogLikelihood;
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class corrLDA_Gibbs method initTest4Dynamical.
public void initTest4Dynamical(ArrayList<_Doc> sampleTestSet, _Doc d, int commentNum) {
_ParentDoc pDoc = (_ParentDoc) d;
pDoc.m_childDocs4Dynamic = new ArrayList<_ChildDoc>();
pDoc.setTopics4Gibbs(number_of_topics, 0);
for (_Stn stnObj : pDoc.getSentences()) {
stnObj.setTopicsVct(number_of_topics);
}
sampleTestSet.add(pDoc);
int count = 0;
for (_ChildDoc cDoc : pDoc.m_childDocs) {
if (count >= commentNum) {
break;
}
count++;
cDoc.setTopics4Gibbs_LDA(number_of_topics, 0);
sampleTestSet.add(cDoc);
pDoc.addChildDoc4Dynamics(cDoc);
}
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class weightedCorrespondenceModel method collectStats.
protected void collectStats(_ParentDoc4DCM pDoc) {
double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
for (int k = 0; k < number_of_topics; k++) {
m_alpha_stat[k] += Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum);
double lambdaSum = Utils.sumOfArray(pDoc.m_lambda_stat[k]);
for (int v = 0; v < vocabulary_size; v++) {
m_beta_stat[k][v] += Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(lambdaSum);
}
}
for (_ChildDoc cDoc : pDoc.m_childDocs) {
double piSum = Utils.sumOfArray(cDoc.m_sstat);
for (int k = 0; k < number_of_topics; k++) m_alpha_c_stat[k] += Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum);
}
}
use of structures._ChildDoc in project IR_Base by Linda-sunshine.
the class weightedCorrespondenceModel method updatePi4Child.
public void updatePi4Child(_ParentDoc4DCM pDoc) {
double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
for (_ChildDoc cDoc : pDoc.m_childDocs) {
int[] iflag = { 0 }, iprint = { -1, 3 };
double fValue = 0;
int fSize = cDoc.m_sstat.length;
double[] piGradient = new double[fSize];
Arrays.fill(piGradient, 0);
double[] piDiag = new double[fSize];
Arrays.fill(piDiag, 0);
double[] pi = new double[fSize];
double[] oldPi = new double[fSize];
for (int k = 0; k < fSize; k++) {
pi[k] = Math.log(cDoc.m_sstat[k]);
oldPi[k] = Math.log(cDoc.m_sstat[k]);
}
try {
do {
double diff = 0;
fValue = piFuncGradientVal(pDoc, gammaSum, cDoc, pi, piGradient);
LBFGS.lbfgs(fSize, 4, pi, fValue, piGradient, false, piDiag, iprint, 1e-2, 1e-10, iflag);
for (int k = 0; k < fSize; k++) {
double tempDiff = 0;
tempDiff = pi[k] - oldPi[k];
if (Math.abs(tempDiff) > diff) {
diff = Math.abs(tempDiff);
}
oldPi[k] = pi[k];
}
if (diff < m_lbfgsConverge) {
// System.out.print("diff\t"+diff+"finish update pi");
break;
}
} while (iflag[0] != 0);
} catch (LBFGS.ExceptionWithIflag e) {
e.printStackTrace();
}
for (int k = 0; k < fSize; k++) {
cDoc.m_sstat[k] = Math.exp(pi[k]);
// System.out.println(cDoc.getName()+"\tcDoc.m_sstat[]"+cDoc.m_sstat[k]);
}
}
}
Aggregations