Search in sources :

Example 1 with Chunker

use of edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker in project cogcomp-nlp by CogComp.

the class ChunkerTrain method init.

public void init() {
    rm = new ChunkerConfigurator().getDefaultConfig();
    String modelFile = rm.getString("modelPath");
    String modelLexFile = rm.getString("modelLexPath");
    chunker = new Chunker(modelFile, modelLexFile);
}
Also used : Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker)

Example 2 with Chunker

use of edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker in project cogcomp-nlp by CogComp.

the class ChunkerTrain method trainModelsWithParser.

public void trainModelsWithParser(Parser parser, String modeldir, String modelname, double dev_ratio) {
    Chunker.isTraining = true;
    double tmpF1 = 0;
    double bestF1 = 0;
    int bestIter = 0;
    double[] F1array = new double[iter];
    String lcpath = modeldir + File.separator + modelname + ".lc";
    String lexpath = modeldir + File.separator + modelname + ".lex";
    // Get the total number of training set
    int cnt = 0;
    LinkedVector ex;
    while ((ex = (LinkedVector) parser.next()) != null) {
        cnt++;
    }
    parser.reset();
    // Get the boundary between train and dev
    long idx = Math.round(cnt * (1 - dev_ratio));
    if (idx < 0)
        idx = 0;
    if (idx > cnt)
        idx = cnt;
    // Run the learner and save F1 for each iteration
    for (int i = 1; i <= iter; i++) {
        cnt = 0;
        while ((ex = (LinkedVector) parser.next()) != null) {
            for (int j = 0; j < ex.size(); j++) {
                chunker.learn(ex.get(j));
            }
            if (cnt >= idx)
                break;
            else
                cnt++;
        }
        chunker.doneWithRound();
        writeModelsToDisk(modeldir, modelname);
        // Test on dev set
        BIOTester tester = new BIOTester(new Chunker(lcpath, lexpath), new ChunkLabel(), new ChildrenFromVectors(parser));
        double[] result = tester.test().getOverallStats();
        tmpF1 = result[2];
        F1array[i - 1] = tmpF1;
        System.out.println("Iteration number : " + i + ". F1 score on devset: " + tmpF1);
        parser.reset();
    }
    // Get the best F1 score and corresponding iter
    for (int i = 0; i < iter; i++) {
        if (F1array[i] > bestF1) {
            bestF1 = F1array[i];
            bestIter = i + 1;
        }
    }
    System.out.println("Best #Iter = " + bestIter + " (F1=" + bestF1 + ")");
    System.out.println("Rerun the learner using best #Iter...");
    // Rerun the learner
    for (int i = 1; i <= bestIter; i++) {
        while ((ex = (LinkedVector) parser.next()) != null) {
            for (int j = 0; j < ex.size(); j++) {
                chunker.learn(ex.get(j));
            }
        }
        parser.reset();
        chunker.doneWithRound();
        System.out.println("Iteration number : " + i);
    }
    chunker.doneLearning();
}
Also used : ChildrenFromVectors(edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) BIOTester(edu.illinois.cs.cogcomp.lbjava.nlp.seg.BIOTester) Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker) ChunkLabel(edu.illinois.cs.cogcomp.chunker.main.lbjava.ChunkLabel)

Example 3 with Chunker

use of edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker in project cogcomp-nlp by CogComp.

the class ChunkTester method chunkTester.

public static void chunkTester(String testFile, String modeldir, String modelname) {
    Parser parser;
    String lcpath = modeldir + File.separator + modelname + ".lc";
    String lexpath = modeldir + File.separator + modelname + ".lex";
    parser = new CoNLL2000Parser(testFile);
    BIOTester tester = new BIOTester(new Chunker(lcpath, lexpath), new ChunkLabel(), new ChildrenFromVectors(parser));
    tester.test().printPerformance(System.out);
}
Also used : ChildrenFromVectors(edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors) BIOTester(edu.illinois.cs.cogcomp.lbjava.nlp.seg.BIOTester) Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker) ChunkLabel(edu.illinois.cs.cogcomp.chunker.main.lbjava.ChunkLabel) CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) CoNLL2000Parser(edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)

Example 4 with Chunker

use of edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker in project cogcomp-nlp by CogComp.

the class TestDiff method testDiff.

@Test
public void testDiff() {
    Chunker tagger = new Chunker();
    Parser parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(testFile)));
    String previous = "";
    String sentence = "";
    int sentenceCounter = 0;
    for (Token w = (Token) parser.next(); w != null; w = (Token) parser.next()) {
        String prediction = tagger.discreteValue(w);
        if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2)))
            sentence += ("[" + prediction.substring(2) + " ");
        sentence += ("(" + w.partOfSpeech + " " + w.form + ") ");
        if (!prediction.equals("O") && (w.next == null || tagger.discreteValue(w.next).equals("O") || tagger.discreteValue(w.next).startsWith("B-") || !tagger.discreteValue(w.next).endsWith(prediction.substring(2))))
            sentence += ("] ");
        if (w.next == null) {
            sentence = sentence.trim();
            String refSentence = refSentences.get(sentenceCounter).trim();
            if (!sentence.equals(refSentence))
                fail("Produced output doesn't match reference: " + "\nProduced: " + sentence + "\nExpected: " + refSentence);
            sentence = "";
            sentenceCounter++;
        }
        previous = prediction;
    }
}
Also used : SentenceSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) WordSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Test(org.junit.Test)

Example 5 with Chunker

use of edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker in project cogcomp-nlp by CogComp.

the class ChunksAndPOSTags method main.

public static void main(String[] args) {
    String filename = null;
    try {
        filename = args[0];
        if (args.length > 1)
            throw new Exception();
    } catch (Exception e) {
        System.err.println("usage: java edu.illinois.cs.cogcomp.chunker.main.ChunksAndPOSTags <input file>");
        System.exit(1);
    }
    Chunker chunker = new Chunker();
    Parser parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(filename)));
    String previous = "";
    for (Word w = (Word) parser.next(); w != null; w = (Word) parser.next()) {
        String prediction = chunker.discreteValue(w);
        if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2)))
            logger.info("[" + prediction.substring(2) + " ");
        logger.info("(" + w.partOfSpeech + " " + w.form + ") ");
        if (!prediction.equals("O") && (w.next == null || chunker.discreteValue(w.next).equals("O") || chunker.discreteValue(w.next).startsWith("B-") || !chunker.discreteValue(w.next).endsWith(prediction.substring(2))))
            logger.info("] ");
        if (w.next == null)
            logger.info("\n");
        previous = prediction;
    }
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) SentenceSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Chunker(edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker) WordSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)

Aggregations

Chunker (edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker)5 Parser (edu.illinois.cs.cogcomp.lbjava.parse.Parser)3 ChunkLabel (edu.illinois.cs.cogcomp.chunker.main.lbjava.ChunkLabel)2 SentenceSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter)2 WordSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter)2 BIOTester (edu.illinois.cs.cogcomp.lbjava.nlp.seg.BIOTester)2 PlainToTokenParser (edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)2 ChildrenFromVectors (edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors)2 CoNLL2000Parser (edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)1 Word (edu.illinois.cs.cogcomp.lbjava.nlp.Word)1 Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)1 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)1 Test (org.junit.Test)1