Search in sources :

Example 86 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method printTopKStn4Child.

protected void printTopKStn4Child(int topK, _ParentDoc pDoc, File topKStnFolder) {
    File topKStn4PDocFolder = new File(topKStnFolder, pDoc.getName());
    if (!topKStn4PDocFolder.exists()) {
        // System.out.println("creating top K stn directory\t"+topKStn4PDocFolder);
        topKStn4PDocFolder.mkdir();
    }
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        String topKStn4ChildFile = cDoc.getName() + ".txt";
        HashMap<Integer, Double> stnSimMap = rankStn4ChildBySim(pDoc, cDoc);
        try {
            int i = 0;
            PrintWriter pw = new PrintWriter(new File(topKStn4PDocFolder, topKStn4ChildFile));
            for (Map.Entry<Integer, Double> e : sortHashMap4Integer(stnSimMap, true)) {
                if (i == topK)
                    break;
                pw.print(e.getKey());
                pw.print("\t" + e.getValue());
                pw.println();
                i++;
            }
            pw.flush();
            pw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 87 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class corrLDA_Gibbs method calculate_log_likelihood4Child.

protected double calculate_log_likelihood4Child(_Doc d) {
    _ChildDoc cDoc = (_ChildDoc) d;
    double docLogLikelihood = 0;
    _SparseFeature[] fv = cDoc.getSparse();
    double docTopicSum = Utils.sumOfArray(cDoc.m_sstat);
    double smoothingSum = m_smoothingParam * number_of_topics;
    for (int i = 0; i < fv.length; i++) {
        int wid = fv[i].getIndex();
        double value = fv[i].getValue();
        double wordLogLikelihood = 0;
        for (int k = 0; k < number_of_topics; k++) {
            double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDoc(k, cDoc) / (smoothingSum + docTopicSum);
            wordLogLikelihood += wordPerTopicLikelihood;
        }
        if (wordLogLikelihood < 1e-10) {
            wordLogLikelihood += 1e-10;
            System.out.println("small likelihood in child");
        }
        wordLogLikelihood = Math.log(wordLogLikelihood);
        docLogLikelihood += value * wordLogLikelihood;
    }
    return docLogLikelihood;
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._SparseFeature(structures._SparseFeature)

Example 88 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class corrLDA_Gibbs method initTest4Dynamical.

public void initTest4Dynamical(ArrayList<_Doc> sampleTestSet, _Doc d, int commentNum) {
    _ParentDoc pDoc = (_ParentDoc) d;
    pDoc.m_childDocs4Dynamic = new ArrayList<_ChildDoc>();
    pDoc.setTopics4Gibbs(number_of_topics, 0);
    for (_Stn stnObj : pDoc.getSentences()) {
        stnObj.setTopicsVct(number_of_topics);
    }
    sampleTestSet.add(pDoc);
    int count = 0;
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        if (count >= commentNum) {
            break;
        }
        count++;
        cDoc.setTopics4Gibbs_LDA(number_of_topics, 0);
        sampleTestSet.add(cDoc);
        pDoc.addChildDoc4Dynamics(cDoc);
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._Stn(structures._Stn) structures._ParentDoc(structures._ParentDoc)

Example 89 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class weightedCorrespondenceModel method collectStats.

protected void collectStats(_ParentDoc4DCM pDoc) {
    double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
    for (int k = 0; k < number_of_topics; k++) {
        m_alpha_stat[k] += Utils.digamma(pDoc.m_sstat[k]) - Utils.digamma(gammaSum);
        double lambdaSum = Utils.sumOfArray(pDoc.m_lambda_stat[k]);
        for (int v = 0; v < vocabulary_size; v++) {
            m_beta_stat[k][v] += Utils.digamma(pDoc.m_lambda_stat[k][v]) - Utils.digamma(lambdaSum);
        }
    }
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        double piSum = Utils.sumOfArray(cDoc.m_sstat);
        for (int k = 0; k < number_of_topics; k++) m_alpha_c_stat[k] += Utils.digamma(cDoc.m_sstat[k]) - Utils.digamma(piSum);
    }
}
Also used : structures._ChildDoc(structures._ChildDoc)

Example 90 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class weightedCorrespondenceModel method updatePi4Child.

public void updatePi4Child(_ParentDoc4DCM pDoc) {
    double gammaSum = Utils.sumOfArray(pDoc.m_sstat);
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        int[] iflag = { 0 }, iprint = { -1, 3 };
        double fValue = 0;
        int fSize = cDoc.m_sstat.length;
        double[] piGradient = new double[fSize];
        Arrays.fill(piGradient, 0);
        double[] piDiag = new double[fSize];
        Arrays.fill(piDiag, 0);
        double[] pi = new double[fSize];
        double[] oldPi = new double[fSize];
        for (int k = 0; k < fSize; k++) {
            pi[k] = Math.log(cDoc.m_sstat[k]);
            oldPi[k] = Math.log(cDoc.m_sstat[k]);
        }
        try {
            do {
                double diff = 0;
                fValue = piFuncGradientVal(pDoc, gammaSum, cDoc, pi, piGradient);
                LBFGS.lbfgs(fSize, 4, pi, fValue, piGradient, false, piDiag, iprint, 1e-2, 1e-10, iflag);
                for (int k = 0; k < fSize; k++) {
                    double tempDiff = 0;
                    tempDiff = pi[k] - oldPi[k];
                    if (Math.abs(tempDiff) > diff) {
                        diff = Math.abs(tempDiff);
                    }
                    oldPi[k] = pi[k];
                }
                if (diff < m_lbfgsConverge) {
                    // System.out.print("diff\t"+diff+"finish update pi");
                    break;
                }
            } while (iflag[0] != 0);
        } catch (LBFGS.ExceptionWithIflag e) {
            e.printStackTrace();
        }
        for (int k = 0; k < fSize; k++) {
            cDoc.m_sstat[k] = Math.exp(pi[k]);
        // System.out.println(cDoc.getName()+"\tcDoc.m_sstat[]"+cDoc.m_sstat[k]);
        }
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) LBFGS(LBFGS.LBFGS)

Aggregations

structures._ChildDoc (structures._ChildDoc)77 structures._ParentDoc (structures._ParentDoc)47 structures._Doc (structures._Doc)35 structures._Stn (structures._Stn)25 structures._Word (structures._Word)22 File (java.io.File)18 structures._ParentDoc4DCM (structures._ParentDoc4DCM)16 structures._SparseFeature (structures._SparseFeature)16 HashMap (java.util.HashMap)14 PrintWriter (java.io.PrintWriter)12 FileNotFoundException (java.io.FileNotFoundException)11 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)2 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1 Parameter (Classifier.supervised.liblinear.Parameter)1 Problem (Classifier.supervised.liblinear.Problem)1 SolverType (Classifier.supervised.liblinear.SolverType)1