Search in sources :

Example 1 with TestDiscrete

use of edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete in project cogcomp-nlp by CogComp.

the class LearningCurveMultiDataset method getLearningCurve.

/**
     * use fixedNumIterations=-1 if you want to use the automatic convergence criterion
     * <p>
     * NB: assuming column format
     */
public static void getLearningCurve(Vector<Data> trainDataSet, Vector<Data> testDataSet, int fixedNumIterations) throws Exception {
    double bestF1Level1 = -1;
    int bestRoundLevel1 = 0;
    // Get the directory name (<configname>.model is appended in LbjTagger/Parameters.java:139)
    String modelPath = ParametersForLbjCode.currentParameters.pathToModelFile;
    String modelPathDir = modelPath.substring(0, modelPath.lastIndexOf("/"));
    if (IOUtils.exists(modelPathDir)) {
        if (!IOUtils.isDirectory(modelPathDir)) {
            String msg = "ERROR: " + NAME + ".getLearningCurve(): model directory '" + modelPathDir + "' already exists as a (non-directory) file.";
            logger.error(msg);
            throw new IOException(msg);
        } else
            logger.warn(NAME + ".getLearningCurve(): writing to existing model path '" + modelPathDir + "'...");
    } else {
        IOUtils.mkdir(modelPathDir);
    }
    NETaggerLevel1.Parameters paramLevel1 = new NETaggerLevel1.Parameters();
    paramLevel1.baseLTU = new SparseAveragedPerceptron(ParametersForLbjCode.currentParameters.learningRatePredictionsLevel1, 0, ParametersForLbjCode.currentParameters.thicknessPredictionsLevel1);
    logger.info("Level 1 classifier learning rate = " + ParametersForLbjCode.currentParameters.learningRatePredictionsLevel1 + ", thickness = " + ParametersForLbjCode.currentParameters.thicknessPredictionsLevel1);
    NETaggerLevel1 tagger1 = new NETaggerLevel1(paramLevel1, modelPath + ".level1", modelPath + ".level1.lex");
    tagger1.forget();
    for (int dataId = 0; dataId < trainDataSet.size(); dataId++) {
        Data trainData = trainDataSet.elementAt(dataId);
        if (ParametersForLbjCode.currentParameters.featuresToUse.containsKey("PredictionsLevel1")) {
            PredictionsAndEntitiesConfidenceScores.getAndMarkEntities(trainData, NEWord.LabelToLookAt.GoldLabel);
            TwoLayerPredictionAggregationFeatures.setLevel1AggregationFeatures(trainData, true);
        }
    }
    // preextract the L1 test and train data.
    String path = ParametersForLbjCode.currentParameters.pathToModelFile;
    String trainPathL1 = path + ".level1.prefetchedTrainData";
    File deleteme = new File(trainPathL1);
    if (deleteme.exists())
        deleteme.delete();
    String testPathL1 = path + ".level1.prefetchedTestData";
    deleteme = new File(testPathL1);
    if (deleteme.exists())
        deleteme.delete();
    logger.info("Pre-extracting the training data for Level 1 classifier, saving to " + trainPathL1);
    BatchTrainer bt1train = prefetchAndGetBatchTrainer(tagger1, trainDataSet, trainPathL1);
    logger.info("Pre-extracting the testing data for Level 1 classifier, saving to " + testPathL1);
    BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1);
    Parser testParser1 = bt1test.getParser();
    for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel1 < 10) || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
        bt1train.train(1);
        testParser1.reset();
        TestDiscrete simpleTest = new TestDiscrete();
        simpleTest.addNull("O");
        TestDiscrete.testDiscrete(simpleTest, tagger1, null, testParser1, true, 0);
        double f1Level1 = simpleTest.getOverallStats()[2];
        if (f1Level1 > bestF1Level1) {
            bestF1Level1 = f1Level1;
            bestRoundLevel1 = i;
            tagger1.save();
        }
        logger.info(i + " rounds.  Best so far for Level1 : (" + bestRoundLevel1 + ")=" + bestF1Level1);
    }
    logger.info("Level 1; best round : " + bestRoundLevel1 + "\tbest F1 : " + bestF1Level1);
    // trash the l2 prefetch data
    String trainPathL2 = path + ".level2.prefetchedTrainData";
    deleteme = new File(trainPathL2);
    if (deleteme.exists())
        deleteme.delete();
    String testPathL2 = path + ".level2.prefetchedTestData";
    deleteme = new File(testPathL1);
    if (deleteme.exists())
        deleteme.delete();
    NETaggerLevel2.Parameters paramLevel2 = new NETaggerLevel2.Parameters();
    paramLevel2.baseLTU = new SparseAveragedPerceptron(ParametersForLbjCode.currentParameters.learningRatePredictionsLevel2, 0, ParametersForLbjCode.currentParameters.thicknessPredictionsLevel2);
    NETaggerLevel2 tagger2 = new NETaggerLevel2(paramLevel2, ParametersForLbjCode.currentParameters.pathToModelFile + ".level2", ParametersForLbjCode.currentParameters.pathToModelFile + ".level2.lex");
    tagger2.forget();
    // Previously checked if PatternFeatures was in featuresToUse.
    if (ParametersForLbjCode.currentParameters.featuresToUse.containsKey("PredictionsLevel1")) {
        logger.info("Level 2 classifier learning rate = " + ParametersForLbjCode.currentParameters.learningRatePredictionsLevel2 + ", thickness = " + ParametersForLbjCode.currentParameters.thicknessPredictionsLevel2);
        double bestF1Level2 = -1;
        int bestRoundLevel2 = 0;
        logger.info("Pre-extracting the training data for Level 2 classifier, saving to " + trainPathL2);
        BatchTrainer bt2train = prefetchAndGetBatchTrainer(tagger2, trainDataSet, trainPathL2);
        logger.info("Pre-extracting the testing data for Level 2 classifier, saving to " + testPathL2);
        BatchTrainer bt2test = prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2);
        Parser testParser2 = bt2test.getParser();
        for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel2 < 10) || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
            logger.info("Learning level 2 classifier; round " + i);
            bt2train.train(1);
            logger.info("Testing level 2 classifier;  on prefetched data, round: " + i);
            testParser2.reset();
            TestDiscrete simpleTest = new TestDiscrete();
            simpleTest.addNull("O");
            TestDiscrete.testDiscrete(simpleTest, tagger2, null, testParser2, true, 0);
            double f1Level2 = simpleTest.getOverallStats()[2];
            if (f1Level2 > bestF1Level2) {
                bestF1Level2 = f1Level2;
                bestRoundLevel2 = i;
                tagger2.save();
            }
            logger.info(i + " rounds.  Best so far for Level2 : (" + bestRoundLevel2 + ") " + bestF1Level2);
        }
        // trash the l2 prefetch data
        deleteme = new File(trainPathL2);
        if (deleteme.exists())
            deleteme.delete();
        deleteme = new File(testPathL1);
        if (deleteme.exists())
            deleteme.delete();
        logger.info("Level1: bestround=" + bestRoundLevel1 + "\t F1=" + bestF1Level1 + "\t Level2: bestround=" + bestRoundLevel2 + "\t F1=" + bestF1Level2);
    }
    /*
         * This will override the models forcing to save the iteration we're interested in- the
         * fixedNumIterations iteration, the last one. But note - both layers will be saved for this
         * iteration. If the best performance for one of the layers came before the final iteration,
         * we're in a small trouble- the performance will decrease
         */
    if (fixedNumIterations > -1) {
        tagger1.save();
        tagger2.save();
    }
}
Also used : NETaggerLevel2(edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel2) NETaggerLevel1(edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel1) TestDiscrete(edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete) IOException(java.io.IOException) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) BatchTrainer(edu.illinois.cs.cogcomp.lbjava.learn.BatchTrainer) File(java.io.File) SparseAveragedPerceptron(edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron)

Example 2 with TestDiscrete

use of edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete in project cogcomp-nlp by CogComp.

the class StanfordParser method main.

/**
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    // use the 4 class classifier.
    String serializedClassifier = "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz";
    AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(serializedClassifier);
    File[] files = new File(args[0]).listFiles();
    List<List<CoreLabel>> result = new ArrayList<List<CoreLabel>>();
    if (files == null || files.length == 0) {
        System.err.println("Either the directory did not exist, or there were no files within.");
        System.exit(-1);
    }
    // both the gold standard label, AND the prediction, which is handy.
    for (File file : files) {
        String fileContents = IOUtils.slurpFile(file.getAbsolutePath());
        CoNLLColumnReaderAndWriter t = new CoNLLColumnReaderAndWriter();
        SeqClassifierFlags flags = new SeqClassifierFlags();
        flags.deleteBlankLines = true;
        t.init(flags);
        List<List<CoreLabel>> out = classifier.classifyRaw(fileContents, t);
        // translate LOCATION labels to B-LOC, I-LOC and so on. Same for ORGANIZATION , PEOPLE and MISC
        for (List<CoreLabel> sentence : out) {
            CoreLabel previousWord = null;
            for (CoreLabel word : sentence) {
                String currentAnnotation = word.get(CoreAnnotations.AnswerAnnotation.class);
                if (!word.get(CoreAnnotations.AnswerAnnotation.class).equals("O")) {
                    String prevAnnotation = previousWord == null ? "" : previousWord.get(CoreAnnotations.AnswerAnnotation.class);
                    if (currentAnnotation.equals("LOCATION")) {
                        if (prevAnnotation.contains("LOC")) {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "I-LOC");
                        } else {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "B-LOC");
                        }
                    } else if (currentAnnotation.equals("PERSON")) {
                        if (prevAnnotation.contains("PER")) {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "I-PER");
                        } else {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "B-PER");
                        }
                    } else if (currentAnnotation.equals("ORGANIZATION")) {
                        if (prevAnnotation.contains("ORG")) {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "I-ORG");
                        } else {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "B-ORG");
                        }
                    } else if (currentAnnotation.equals("MISC")) {
                        if (prevAnnotation.contains("MISC")) {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "I-MISC");
                        } else {
                            word.set(CoreAnnotations.AnswerAnnotation.class, "B-MISC");
                        }
                    } else
                        System.err.println("WHAT KIND OF ANNOTATION IS " + currentAnnotation);
                }
                previousWord = word;
            // System.out.print(word.word() + '\t' + word.get(CoreAnnotations.AnswerAnnotation.class) + '\t' + word.get(CoreAnnotations.GoldAnswerAnnotation.class) +'\n');
            }
        }
        result.addAll(out);
    }
    // produce token level accuracy.
    System.out.println("Token level accuracy:");
    TestDiscrete td = TestDiscrete.testDiscrete(new AnswerClassifier(), new GoldClassifier(), new StanfordParser(result, false));
    td.addNull("O");
    td.printPerformance(System.out);
    // produce phrase level accuracy.
    System.out.println("\nPhrase level accuracy:");
    td = TestDiscrete.testDiscrete(new AnswerClassifier(), new GoldClassifier(), new StanfordParser(result, true));
    td.addNull("O");
    td.printPerformance(System.out);
}
Also used : TestDiscrete(edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete) AnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation) GoldAnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.GoldAnswerAnnotation) ArrayList(java.util.ArrayList) SeqClassifierFlags(edu.stanford.nlp.sequences.SeqClassifierFlags) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File)

Example 3 with TestDiscrete

use of edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete in project cogcomp-nlp by CogComp.

the class BIOTester method test.

/**
 * This method runs the tester, packaging the results in a
 * <code>TestDiscrete</code> object.
 *
 * @return The performance results.
 */
public TestDiscrete test() {
    TestDiscrete results = new TestDiscrete();
    results.addNull("O");
    for (Token t = (Token) parser.next(); t != null; t = (Token) parser.next()) {
        Vector<Token> vector = new Vector<>();
        for (; t.next != null; t = (Token) parser.next()) vector.add(t);
        vector.add(t);
        int N = vector.size();
        String[] predictions = new String[N], labels = new String[N];
        for (int i = 0; i < N; ++i) {
            predictions[i] = classifier.discreteValue(vector.get(i));
            labels[i] = labeler.discreteValue(vector.get(i));
        }
        for (int i = 0; i < N; ++i) {
            String p = "O", l = "O";
            int pEnd = -1, lEnd = -1;
            if (predictions[i].startsWith("B-") || predictions[i].startsWith("I-") && (i == 0 || !predictions[i - 1].endsWith(predictions[i].substring(2)))) {
                p = predictions[i].substring(2);
                pEnd = i;
                while (pEnd + 1 < N && predictions[pEnd + 1].equals("I-" + p)) ++pEnd;
            }
            if (labels[i].startsWith("B-") || labels[i].startsWith("I-") && (i == 0 || !labels[i - 1].endsWith(labels[i].substring(2)))) {
                l = labels[i].substring(2);
                lEnd = i;
                while (lEnd + 1 < N && labels[lEnd + 1].equals("I-" + l)) ++lEnd;
            }
            if (!p.equals("O") || !l.equals("O")) {
                if (pEnd == lEnd)
                    results.reportPrediction(p, l);
                else {
                    if (!p.equals("O"))
                        results.reportPrediction(p, "O");
                    if (!l.equals("O"))
                        results.reportPrediction("O", l);
                }
            }
        }
    }
    return results;
}
Also used : TestDiscrete(edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete) Vector(java.util.Vector)

Example 4 with TestDiscrete

use of edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete in project cogcomp-nlp by CogComp.

the class Quantifier method test.

public void test() {
    QuantitiesClassifier classifier = new QuantitiesClassifier(modelName + ".lc", modelName + ".lex");
    QuantitiesDataReader testReader = new QuantitiesDataReader(dataDir + "/test.txt", "test");
    TestDiscrete tester = new TestDiscrete();
    tester.addNull("O");
    TestDiscrete.testDiscrete(tester, classifier, new QuantitiesLabel(), testReader, true, 1000);
}
Also used : TestDiscrete(edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete)

Example 5 with TestDiscrete

use of edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete in project cogcomp-nlp by CogComp.

the class Main method test.

public void test() {
    ConstrainedPrepSRLClassifier classifier = new ConstrainedPrepSRLClassifier();
    Parser testDataReader = new PrepSRLDataReader(dataDir, "test");
    TestDiscrete tester = new TestDiscrete();
    TestDiscrete.testDiscrete(tester, classifier, new PrepSRLClassifier.Label(), testDataReader, true, 100);
    testDataReader.close();
}
Also used : TestDiscrete(edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete) PrepSRLDataReader(edu.illinois.cs.cogcomp.prepsrl.data.PrepSRLDataReader) ConstrainedPrepSRLClassifier(edu.illinois.cs.cogcomp.prepsrl.inference.ConstrainedPrepSRLClassifier) ConstrainedPrepSRLClassifier(edu.illinois.cs.cogcomp.prepsrl.inference.ConstrainedPrepSRLClassifier) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser)

Aggregations

TestDiscrete (edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete)11 Parser (edu.illinois.cs.cogcomp.lbjava.parse.Parser)3 File (java.io.File)3 Vector (java.util.Vector)3 FeatureVector (edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector)2 BatchTrainer (edu.illinois.cs.cogcomp.lbjava.learn.BatchTrainer)2 SparseAveragedPerceptron (edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron)2 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)2 NETaggerLevel1 (edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel1)2 NETaggerLevel2 (edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel2)2 IOException (java.io.IOException)2 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)1 POSBracketToToken (edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken)1 PrepSRLDataReader (edu.illinois.cs.cogcomp.prepsrl.data.PrepSRLDataReader)1 ConstrainedPrepSRLClassifier (edu.illinois.cs.cogcomp.prepsrl.inference.ConstrainedPrepSRLClassifier)1 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 AnswerAnnotation (edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation)1 GoldAnswerAnnotation (edu.stanford.nlp.ling.CoreAnnotations.GoldAnswerAnnotation)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 SeqClassifierFlags (edu.stanford.nlp.sequences.SeqClassifierFlags)1