Search in sources :

Example 76 with structures._ParentDoc

use of structures._ParentDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method printTopKChild4Stn.

protected void printTopKChild4Stn(int topK, _ParentDoc pDoc, File topKChildFolder) {
    File topKChild4PDocFolder = new File(topKChildFolder, pDoc.getName());
    if (!topKChild4PDocFolder.exists()) {
        // System.out.println("creating top K stn directory\t"+topKChild4PDocFolder);
        topKChild4PDocFolder.mkdir();
    }
    for (_Stn stnObj : pDoc.getSentences()) {
        HashMap<String, Double> likelihoodMap = rankChild4StnByLikelihood(stnObj, pDoc);
        String topChild4StnFile = (stnObj.getIndex() + 1) + ".txt";
        try {
            int i = 0;
            PrintWriter pw = new PrintWriter(new File(topKChild4PDocFolder, topChild4StnFile));
            for (Map.Entry<String, Double> e : sortHashMap4String(likelihoodMap, true)) {
                if (i == topK)
                    break;
                pw.print(e.getKey());
                pw.print("\t" + e.getValue());
                pw.println();
                i++;
            }
            pw.flush();
            pw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
Also used : structures._Stn(structures._Stn) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 77 with structures._ParentDoc

use of structures._ParentDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method printTopKChild4Parent.

protected void printTopKChild4Parent(String filePrefix, int topK) {
    String topKChild4StnFile = filePrefix + "topChild4Parent.txt";
    try {
        PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
        for (_Doc d : m_trainSet) {
            if (d instanceof _ParentDoc) {
                _ParentDoc pDoc = (_ParentDoc) d;
                pw.print(pDoc.getName() + "\t");
                for (_ChildDoc cDoc : pDoc.m_childDocs) {
                    double docScore = rankChild4ParentBySim(cDoc, pDoc);
                    pw.print(cDoc.getName() + ":" + docScore + "\t");
                }
                pw.println();
            }
        }
        pw.flush();
        pw.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Example 78 with structures._ParentDoc

use of structures._ParentDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method separateTrainTest4Dynamic.

public void separateTrainTest4Dynamic() {
    int cvFold = 10;
    ArrayList<String> parentFakeList = new ArrayList<String>();
    String parentFakeString = "37 198 90 84 358 468 381 361 452 164 323 386 276 285 277 206 402 293 354 62 451 161 287 232 337 471 143 93 217 263 260 175 79 237 95 387 391 193 470 196 190 43 135 458 244 464 266 25 303 211";
    // String parentFakeString =
    // "448 348 294 329 317 212 327 127 262 148 307 139 40 325 224 234 233 430 357 78 191 150 424 206 125 484 293 73 456 111 141 68 106 183 215 402 209 159 34 156 280 265 458 65 32 118 352 105 404 66";
    String[] parentFakeStringArray = parentFakeString.split(" ");
    for (String parentName : parentFakeStringArray) {
        parentFakeList.add(parentName);
        System.out.println("parent Name\t" + parentName);
    }
    ArrayList<_Doc> parentTrainSet = new ArrayList<_Doc>();
    double avgCommentNum = 0;
    m_trainSet = new ArrayList<_Doc>();
    m_testSet = new ArrayList<_Doc>();
    for (_Doc d : m_corpus.getCollection()) {
        if (d instanceof _ParentDoc) {
            String parentName = d.getName();
            if (parentFakeList.contains(parentName)) {
                m_testSet.add(d);
                avgCommentNum += ((_ParentDoc) d).m_childDocs.size();
            } else {
                parentTrainSet.add(d);
            }
        }
    }
    System.out.println("avg comments for parent doc in testSet\t" + avgCommentNum * 1.0 / m_testSet.size());
    for (_Doc d : parentTrainSet) {
        _ParentDoc pDoc = (_ParentDoc) d;
        m_trainSet.add(d);
        pDoc.m_childDocs4Dynamic = new ArrayList<_ChildDoc>();
        for (_ChildDoc cDoc : pDoc.m_childDocs) {
            m_trainSet.add(cDoc);
            pDoc.addChildDoc4Dynamics(cDoc);
        }
    }
    System.out.println("m_testSet size\t" + m_testSet.size());
    System.out.println("m_trainSet size\t" + m_trainSet.size());
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) ArrayList(java.util.ArrayList)

Example 79 with structures._ParentDoc

use of structures._ParentDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method mixTest4Spam.

public void mixTest4Spam() {
    int t = 0, j1 = 0;
    _ChildDoc tmpDoc1;
    int testSize = m_testSet.size();
    for (int i = 0; i < testSize; i++) {
        t = m_rand.nextInt(testSize);
        while (t == i) t = m_rand.nextInt(testSize);
        _ParentDoc pDoc1 = (_ParentDoc) m_testSet.get(i);
        _ParentDoc pDoc2 = (_ParentDoc) m_testSet.get(t);
        int pDocCDocSize2 = pDoc2.m_childDocs.size();
        j1 = m_rand.nextInt(pDocCDocSize2);
        tmpDoc1 = (_ChildDoc) pDoc2.m_childDocs.get(j1);
        pDoc1.addChildDoc(tmpDoc1);
    }
}
Also used : structures._ChildDoc(structures._ChildDoc) structures._ParentDoc(structures._ParentDoc)

Example 80 with structures._ParentDoc

use of structures._ParentDoc in project IR_Base by Linda-sunshine.

the class LDAGibbs4AC_test method printTopKChild4StnWithHybrid.

protected void printTopKChild4StnWithHybrid(String filePrefix, int topK) {
    String topKChild4StnFile = filePrefix + "topChild4Stn_hybrid.txt";
    try {
        PrintWriter pw = new PrintWriter(new File(topKChild4StnFile));
        m_LM.generateReferenceModel();
        for (_Doc d : m_trainSet) {
            if (d instanceof _ParentDoc) {
                _ParentDoc pDoc = (_ParentDoc) d;
                pw.println(pDoc.getName() + "\t" + pDoc.getSenetenceSize());
                for (_Stn stnObj : pDoc.getSentences()) {
                    // HashMap<String, Double> likelihoodMap =
                    // rankChild4StnByLikelihood(stnObj, pDoc);
                    HashMap<String, Double> likelihoodMap = rankChild4StnByHybrid(stnObj, pDoc);
                    // HashMap<String, Double> likelihoodMap =
                    // rankChild4StnByLanguageModel(stnObj, pDoc);
                    int i = 0;
                    pw.print((stnObj.getIndex() + 1) + "\t");
                    for (Map.Entry<String, Double> e : sortHashMap4String(likelihoodMap, true)) {
                        // if(i==topK)
                        // break;
                        pw.print(e.getKey());
                        pw.print(":" + e.getValue());
                        pw.print("\t");
                        i++;
                    }
                    pw.println();
                }
            }
        }
        pw.flush();
        pw.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : structures._Stn(structures._Stn) structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) FileNotFoundException(java.io.FileNotFoundException) PrintWriter(java.io.PrintWriter)

Aggregations

structures._ParentDoc (structures._ParentDoc)72 structures._ChildDoc (structures._ChildDoc)50 structures._Doc (structures._Doc)39 structures._Stn (structures._Stn)30 File (java.io.File)29 PrintWriter (java.io.PrintWriter)22 FileNotFoundException (java.io.FileNotFoundException)20 HashMap (java.util.HashMap)17 structures._Word (structures._Word)17 structures._SparseFeature (structures._SparseFeature)14 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)8 Map (java.util.Map)7 ArrayList (java.util.ArrayList)6 structures._ParentDoc4DCM (structures._ParentDoc4DCM)4 IOException (java.io.IOException)2 ParseException (java.text.ParseException)2 JSONObject (json.JSONObject)2 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1