Search in sources :

Example 81 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method printTopKChild4Parent.

protected void printTopKChild4Parent(String filePrefix, int topK) {
    String topKChild4StnFile = filePrefix + "topChild4Parent.txt";
    try {
        PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
        for (_Doc d : m_trainSet) {
            if (d instanceof _ParentDoc) {
                _ParentDoc pDoc = (_ParentDoc) d;
                pw.print(pDoc.getName() + "\t");
                for (_ChildDoc cDoc : pDoc.m_childDocs) {
                    double docScore = rankChild4ParentBySim(cDoc, pDoc);
                    pw.print(cDoc.getName() + ":" + docScore + "\t");
                }
                pw.println();
            }
        }
        pw.flush();
        pw.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 82 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method separateTrainTest4Dynamic.

public void separateTrainTest4Dynamic() {
    int cvFold = 10;
    ArrayList<String> parentFakeList = new ArrayList<String>();
    String parentFakeString = "37 198 90 84 358 468 381 361 452 164 323 386 276 285 277 206 402 293 354 62 451 161 287 232 337 471 143 93 217 263 260 175 79 237 95 387 391 193 470 196 190 43 135 458 244 464 266 25 303 211";
    // String parentFakeString =
    // "448 348 294 329 317 212 327 127 262 148 307 139 40 325 224 234 233 430 357 78 191 150 424 206 125 484 293 73 456 111 141 68 106 183 215 402 209 159 34 156 280 265 458 65 32 118 352 105 404 66";
    String[] parentFakeStringArray = parentFakeString.split(" ");
    for (String parentName : parentFakeStringArray) {
        parentFakeList.add(parentName);
        System.out.println("parent Name\t" + parentName);
    }
    ArrayList<_Doc> parentTrainSet = new ArrayList<_Doc>();
    double avgCommentNum = 0;
    m_trainSet = new ArrayList<_Doc>();
    m_testSet = new ArrayList<_Doc>();
    for (_Doc d : m_corpus.getCollection()) {
        if (d instanceof _ParentDoc) {
            String parentName = d.getName();
            if (parentFakeList.contains(parentName)) {
                m_testSet.add(d);
                avgCommentNum += ((_ParentDoc) d).m_childDocs.size();
            } else {
                parentTrainSet.add(d);
            }
        }
    }
    System.out.println("avg comments for parent doc in testSet\t" + avgCommentNum * 1.0 / m_testSet.size());
    for (_Doc d : parentTrainSet) {
        _ParentDoc pDoc = (_ParentDoc) d;
        m_trainSet.add(d);
        pDoc.m_childDocs4Dynamic = new ArrayList<_ChildDoc>();
        for (_ChildDoc cDoc : pDoc.m_childDocs) {
            m_trainSet.add(cDoc);
            pDoc.addChildDoc4Dynamics(cDoc);
        }
    }
    System.out.println("m_testSet size\t" + m_testSet.size());
    System.out.println("m_trainSet size\t" + m_trainSet.size());
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) ArrayList(java.util.ArrayList)

Example 83 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method mixTest4Spam.

public void mixTest4Spam() {
    int t = 0, j1 = 0;
    _ChildDoc tmpDoc1;
    int testSize = m_testSet.size();
    for (int i = 0; i < testSize; i++) {
        t = m_rand.nextInt(testSize);
        while (t == i) t = m_rand.nextInt(testSize);
        _ParentDoc pDoc1 = (_ParentDoc) m_testSet.get(i);
        _ParentDoc pDoc2 = (_ParentDoc) m_testSet.get(t);
        int pDocCDocSize2 = pDoc2.m_childDocs.size();
        j1 = m_rand.nextInt(pDocCDocSize2);
        tmpDoc1 = (_ChildDoc) pDoc2.m_childDocs.get(j1);
        pDoc1.addChildDoc(tmpDoc1);
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._ParentDoc(structures._ParentDoc)

Example 84 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method printParameter.

protected void printParameter(String parentParameterFile, String childParameterFile, ArrayList<_Doc> docList) {
    System.out.println("printing parameter");
    try {
        System.out.println(parentParameterFile);
        System.out.println(childParameterFile);
        PrintWriter parentParaOut = new PrintWriter(new File(parentParameterFile));
        PrintWriter childParaOut = new PrintWriter(new File(childParameterFile));
        for (_Doc d : docList) {
            if (d instanceof _ParentDoc) {
                parentParaOut.print(d.getName() + "\t");
                parentParaOut.print("topicProportion\t");
                for (int k = 0; k < number_of_topics; k++) {
                    parentParaOut.print(d.m_topics[k] + "\t");
                }
                for (_Stn stnObj : d.getSentences()) {
                    parentParaOut.print("sentence" + (stnObj.getIndex() + 1) + "\t");
                    for (int k = 0; k < number_of_topics; k++) {
                        parentParaOut.print(stnObj.m_topics[k] + "\t");
                    }
                }
                parentParaOut.println();
                for (_ChildDoc cDoc : ((_ParentDoc) d).m_childDocs) {
                    childParaOut.print(cDoc.getName() + "\t");
                    childParaOut.print("topicProportion\t");
                    for (int k = 0; k < number_of_topics; k++) {
                        childParaOut.print(cDoc.m_topics[k] + "\t");
                    }
                    childParaOut.println();
                }
            }
        }
        parentParaOut.flush();
        parentParaOut.close();
        childParaOut.flush();
        childParaOut.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : structures._Stn(structures._Stn) structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 85 with structures._ChildDoc

use of structures._ChildDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method rankChild4StnByLikelihood.

// stn is a query, retrieve comment by likelihood
protected HashMap<String, Double> rankChild4StnByLikelihood(_Stn stnObj, _ParentDoc pDoc) {
    HashMap<String, Double> childLikelihoodMap = new HashMap<String, Double>();
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        int cDocLen = cDoc.getTotalDocLength();
        double stnLogLikelihood = 0;
        for (_Word w : stnObj.getWords()) {
            double wordLikelihood = 0;
            int wid = w.getIndex();
            for (int k = 0; k < number_of_topics; k++) {
                wordLikelihood += (word_topic_sstat[k][wid] / m_sstat[k]) * (topicInDocProb(k, cDoc) / (d_alpha * number_of_topics + cDocLen));
            // wordLikelihood +=
            // topic_term_probabilty[k][wid]*cDoc.m_topics[k];
            }
            stnLogLikelihood += Math.log(wordLikelihood);
        }
        childLikelihoodMap.put(cDoc.getName(), stnLogLikelihood);
    }
    return childLikelihoodMap;
}
Also used : structures._ChildDoc(structures._ChildDoc) HashMap(java.util.HashMap) structures._Word(structures._Word)

Aggregations

structures._ChildDoc (structures._ChildDoc)77 structures._ParentDoc (structures._ParentDoc)47 structures._Doc (structures._Doc)35 structures._Stn (structures._Stn)25 structures._Word (structures._Word)22 File (java.io.File)18 structures._ParentDoc4DCM (structures._ParentDoc4DCM)16 structures._SparseFeature (structures._SparseFeature)16 HashMap (java.util.HashMap)14 PrintWriter (java.io.PrintWriter)12 FileNotFoundException (java.io.FileNotFoundException)11 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)2 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1 Parameter (Classifier.supervised.liblinear.Parameter)1 Problem (Classifier.supervised.liblinear.Problem)1 SolverType (Classifier.supervised.liblinear.SolverType)1