Search in sources :

Example 11 with Parser

use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.

the class TestDiff method testDiff.

@Test
public void testDiff() {
    Chunker tagger = new Chunker();
    Parser parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(testFile)));
    String previous = "";
    String sentence = "";
    int sentenceCounter = 0;
    for (Token w = (Token) parser.next(); w != null; w = (Token) parser.next()) {
        String prediction = tagger.discreteValue(w);
        if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2)))
            sentence += ("[" + prediction.substring(2) + " ");
        sentence += ("(" + w.partOfSpeech + " " + w.form + ") ");
        if (!prediction.equals("O") && (w.next == null || tagger.discreteValue(w.next).equals("O") || tagger.discreteValue(w.next).startsWith("B-") || !tagger.discreteValue(w.next).endsWith(prediction.substring(2))))
            sentence += ("] ");
        if (w.next == null) {
            sentence = sentence.trim();
            String refSentence = refSentences.get(sentenceCounter).trim();
            if (!sentence.equals(refSentence))
                fail("Produced output doesn't match reference: " + "\nProduced: " + sentence + "\nExpected: " + refSentence);
            sentence = "";
            sentenceCounter++;
        }
        previous = prediction;
    }
}
Also used : SentenceSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) WordSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Test(org.junit.Test)

Example 12 with Parser

use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.

the class ChunkerTrain method trainModels.

/**
     * Trains the chunker models with the specified training data which must be in CoNLL2000 format
     *
     * @param trainingData The labeled training data
     */
public void trainModels(String trainingData) {
    Parser parser = new CoNLL2000Parser(trainingData);
    trainModelsWithParser(parser);
}
Also used : CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)

Example 13 with Parser

use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.

the class ChunkerTrain method trainModels.

public void trainModels(String trainingData, String modeldir, String modelname, double dev_ratio) {
    Parser parser = new CoNLL2000Parser(trainingData);
    trainModelsWithParser(parser, modeldir, modelname, dev_ratio);
}
Also used : CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)

Example 14 with Parser

use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.

the class ChunksAndPOSTags method main.

public static void main(String[] args) {
    String filename = null;
    try {
        filename = args[0];
        if (args.length > 1)
            throw new Exception();
    } catch (Exception e) {
        System.err.println("usage: java edu.illinois.cs.cogcomp.chunker.main.ChunksAndPOSTags <input file>");
        System.exit(1);
    }
    Chunker chunker = new Chunker();
    Parser parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(filename)));
    String previous = "";
    for (Word w = (Word) parser.next(); w != null; w = (Word) parser.next()) {
        String prediction = chunker.discreteValue(w);
        if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2)))
            logger.info("[" + prediction.substring(2) + " ");
        logger.info("(" + w.partOfSpeech + " " + w.form + ") ");
        if (!prediction.equals("O") && (w.next == null || chunker.discreteValue(w.next).equals("O") || chunker.discreteValue(w.next).startsWith("B-") || !chunker.discreteValue(w.next).endsWith(prediction.substring(2))))
            logger.info("] ");
        if (w.next == null)
            logger.info("\n");
        previous = prediction;
    }
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) SentenceSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker) WordSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)

Example 15 with Parser

use of edu.illinois.cs.cogcomp.lbjava.parse.Parser in project cogcomp-nlp by CogComp.

the class TestChunkerModels method testAccuracy.

public void testAccuracy() {
    Parser parser = new ChildrenFromVectors(new CoNLL2000Parser(labeledData));
    int numSeen = 0;
    int numEqual = 0;
    for (Token w = (Token) parser.next(); w != null; w = (Token) parser.next()) {
        String prediction = tagger.discreteValue(w);
        String raw = w.toString();
        String actualChunk = raw.substring(raw.indexOf('(') + 1, raw.indexOf(' '));
        if (prediction.equals(actualChunk)) {
            numEqual++;
        }
        numSeen++;
    }
    logger.info("Total accuracy over " + numSeen + " items: " + String.format("%.2f", 100.0 * (double) numEqual / (double) numSeen) + "%");
}
Also used : ChildrenFromVectors(edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors) CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)

Aggregations

Parser (edu.illinois.cs.cogcomp.lbjava.parse.Parser)16 SentenceSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter)5 WordSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter)5 CoNLL2000Parser (edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)4 Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)4 Chunker (edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker)3 Word (edu.illinois.cs.cogcomp.lbjava.nlp.Word)3 PlainToTokenParser (edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)3 Classifier (edu.illinois.cs.cogcomp.lbjava.classify.Classifier)2 TestDiscrete (edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete)2 BatchTrainer (edu.illinois.cs.cogcomp.lbjava.learn.BatchTrainer)2 POSBracketToToken (edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken)2 ChildrenFromVectors (edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors)2 PrepSRLDataReader (edu.illinois.cs.cogcomp.prepsrl.data.PrepSRLDataReader)2 ConstrainedPrepSRLClassifier (edu.illinois.cs.cogcomp.prepsrl.inference.ConstrainedPrepSRLClassifier)2 Test (org.junit.Test)2 ChunkLabel (edu.illinois.cs.cogcomp.chunker.main.lbjava.ChunkLabel)1 FeatureVector (edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector)1 Learner (edu.illinois.cs.cogcomp.lbjava.learn.Learner)1 SparseAveragedPerceptron (edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron)1