Search in sources :

Example 46 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class ACCTM_CHard method cal_logLikelihood_partial4Child.

@Override
protected double cal_logLikelihood_partial4Child(_Doc d) {
    _ChildDoc4BaseWithPhi_Hard cDoc = (_ChildDoc4BaseWithPhi_Hard) d;
    double docLogLikelihood = 0.0;
    double gammaLen = Utils.sumOfArray(m_gamma);
    double cDocXSum = Utils.sumOfArray(cDoc.m_xSstat);
    for (_Word w : cDoc.getTestWords()) {
        int wid = w.getIndex();
        double wordLogLikelihood = 0;
        if (Utils.indexOf(cDoc.m_parentDoc.getSparse(), wid) != -1) {
            for (int k = 0; k < number_of_topics; k++) {
                double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDocProb(k, cDoc);
                wordLogLikelihood += wordPerTopicLikelihood;
            }
        } else {
            for (int k = 0; k < number_of_topics; k++) {
                double wordPerTopicLikelihood = childWordByTopicProb(k, wid) * childTopicInDocProb(k, cDoc) * childXInDocProb(0, cDoc) / (cDocXSum + gammaLen);
                wordLogLikelihood += wordPerTopicLikelihood;
            }
            double wordPerTopicLikelihood = childLocalWordByTopicProb(wid, cDoc) * childXInDocProb(1, cDoc) / (cDocXSum + gammaLen);
            wordLogLikelihood += wordPerTopicLikelihood;
        }
        if (Math.abs(wordLogLikelihood) < 1e-10) {
            System.out.println("wordLoglikelihood\t" + wordLogLikelihood);
            wordLogLikelihood += 1e-10;
        }
        wordLogLikelihood = Math.log(wordLogLikelihood);
        docLogLikelihood += wordLogLikelihood;
    }
    return docLogLikelihood;
}
Also used : structures._ChildDoc4BaseWithPhi_Hard(structures._ChildDoc4BaseWithPhi_Hard) structures._Word(structures._Word)

Example 47 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class ACCTM_CZLR method setFeatures4Word.

protected void setFeatures4Word(ArrayList<_Doc> docList) {
    for (_Doc d : docList) {
        if (d instanceof _ParentDoc)
            continue;
        _SparseFeature[] sfs = d.getSparse();
        for (_Word w : d.getWords()) {
            int wid = w.getIndex();
            int wIndex = Utils.indexOf(sfs, wid);
            _SparseFeature sf = sfs[wIndex];
            w.setFeatures(sf.getValues());
        }
    }
}
Also used : structures._Doc(structures._Doc) structures._ParentDoc(structures._ParentDoc) structures._SparseFeature(structures._SparseFeature) structures._Word(structures._Word)

Example 48 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class ACCTM_CZLR method updateFeatureWeight.

public void updateFeatureWeight(_ParentDoc pDoc, int iter, File weightIterFolder) {
    int totalChildWordNum = 0;
    int featureLen = 0;
    ArrayList<Double> targetValList = new ArrayList<Double>();
    ArrayList<Feature[]> featureList = new ArrayList<Feature[]>();
    for (_ChildDoc cDoc : pDoc.m_childDocs) {
        for (_Word w : cDoc.getWords()) {
            double[] wordFeatures = w.getFeatures();
            double x = w.getX();
            featureLen = wordFeatures.length;
            Feature[] featureVec = new Feature[featureLen];
            for (int i = 0; i < featureLen; i++) {
                featureVec[i] = new FeatureNode(i + 1, wordFeatures[i]);
            }
            featureList.add(featureVec);
            targetValList.add(x);
        }
    }
    totalChildWordNum = featureList.size();
    double[] targetVal = new double[totalChildWordNum];
    Feature[][] featureMatrix = new Feature[totalChildWordNum][];
    for (int i = 0; i < totalChildWordNum; i++) {
        featureMatrix[i] = featureList.get(i);
    }
    for (int i = 0; i < totalChildWordNum; i++) {
        targetVal[i] = targetValList.get(i);
    }
    Problem problem = new Problem();
    problem.l = totalChildWordNum;
    // featureNum
    problem.n = featureLen + 1;
    problem.x = featureMatrix;
    problem.y = targetVal;
    SolverType solver = SolverType.L2R_LR;
    double C = 1.0;
    double eps = 0.01;
    Parameter param = new Parameter(solver, C, eps);
    Model model = Linear.train(problem, param);
    int featureNum = model.getNrFeature();
    for (int i = 0; i < featureNum; i++) pDoc.m_featureWeight[i] = model.getDecfunCoef(i, 0);
    String weightFile = pDoc.getName() + ".txt";
    File modelFile = new File(weightIterFolder, weightFile);
    try {
        // if((iter>200)&&(iter%100==0))
        model.save(modelFile);
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) structures._Word(structures._Word) SolverType(Classifier.supervised.liblinear.SolverType) Feature(Classifier.supervised.liblinear.Feature) structures._SparseFeature(structures._SparseFeature) structures._ChildDoc(structures._ChildDoc) FeatureNode(Classifier.supervised.liblinear.FeatureNode) Model(Classifier.supervised.liblinear.Model) Parameter(Classifier.supervised.liblinear.Parameter) Problem(Classifier.supervised.liblinear.Problem) File(java.io.File)

Example 49 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class ACCTM_C_test method printParentTopicAssignment.

protected void printParentTopicAssignment(_Doc d, File topicFolder) {
    // System.out.println("printing topic assignment parent documents");
    _ParentDoc pDoc = (_ParentDoc) d;
    String topicAssignmentFile = pDoc.getName() + ".txt";
    try {
        PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
        for (_Stn stnObj : pDoc.getSentences()) {
            pw.print(stnObj.getIndex() + "\t");
            for (_Word w : stnObj.getWords()) {
                int index = w.getIndex();
                int topic = w.getTopic();
                String featureName = m_corpus.getFeature(index);
                // System.out.println("test\t"+featureName+"\tdocName\t"+d.getName());
                pw.print(featureName + ":" + topic + "\t");
            }
            pw.println();
        }
        pw.flush();
        pw.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : structures._Stn(structures._Stn) structures._ParentDoc(structures._ParentDoc) FileNotFoundException(java.io.FileNotFoundException) structures._Word(structures._Word) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 50 with structures._Word

use of structures._Word in project IR_Base by Linda-sunshine.

the class ACCTM_C_test method printChildTopicAssignment.

protected void printChildTopicAssignment(_Doc d, File topicFolder) {
    // System.out.println("printing topic assignment parent documents");
    String topicAssignmentFile = d.getName() + ".txt";
    try {
        PrintWriter pw = new PrintWriter(new File(topicFolder, topicAssignmentFile));
        for (_Word w : d.getWords()) {
            int index = w.getIndex();
            int topic = w.getTopic();
            String featureName = m_corpus.getFeature(index);
            pw.print(featureName + ":" + topic + "\t");
        }
        pw.flush();
        pw.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) structures._Word(structures._Word) File(java.io.File) PrintWriter(java.io.PrintWriter)

Aggregations

structures._Word (structures._Word)69 structures._ChildDoc (structures._ChildDoc)18 File (java.io.File)16 FileNotFoundException (java.io.FileNotFoundException)15 PrintWriter (java.io.PrintWriter)15 structures._ParentDoc (structures._ParentDoc)14 structures._Doc (structures._Doc)12 structures._Stn (structures._Stn)11 structures._ParentDoc4DCM (structures._ParentDoc4DCM)10 structures._ChildDoc4BaseWithPhi (structures._ChildDoc4BaseWithPhi)9 HashMap (java.util.HashMap)5 structures._Doc4DCMLDA (structures._Doc4DCMLDA)4 structures._Doc4SparseDCMLDA (structures._Doc4SparseDCMLDA)4 structures._SparseFeature (structures._SparseFeature)3 Feature (Classifier.supervised.liblinear.Feature)1 FeatureNode (Classifier.supervised.liblinear.FeatureNode)1 Model (Classifier.supervised.liblinear.Model)1 Parameter (Classifier.supervised.liblinear.Parameter)1 Problem (Classifier.supervised.liblinear.Problem)1 SolverType (Classifier.supervised.liblinear.SolverType)1