Search in sources :

Example 1 with Tagger

use of edu.stanford.nlp.tagger.common.Tagger in project CoreNLP by stanfordnlp.

the class ShiftReduceParser method train.

private void train(List<Pair<String, FileFilter>> trainTreebankPath, Pair<String, FileFilter> devTreebankPath, String serializedPath) {
    log.info("Training method: " + op.trainOptions().trainingMethod);
    List<Tree> binarizedTrees = Generics.newArrayList();
    for (Pair<String, FileFilter> treebank : trainTreebankPath) {
        binarizedTrees.addAll(readBinarizedTreebank(treebank.first(), treebank.second()));
    }
    int nThreads = op.trainOptions.trainingThreads;
    nThreads = nThreads <= 0 ? Runtime.getRuntime().availableProcessors() : nThreads;
    Tagger tagger = null;
    if (op.testOptions.preTag) {
        Timing retagTimer = new Timing();
        tagger = Tagger.loadModel(op.testOptions.taggerSerializedFile);
        redoTags(binarizedTrees, tagger, nThreads);
        retagTimer.done("Retagging");
    }
    Set<String> knownStates = findKnownStates(binarizedTrees);
    Set<String> rootStates = findRootStates(binarizedTrees);
    Set<String> rootOnlyStates = findRootOnlyStates(binarizedTrees, rootStates);
    log.info("Known states: " + knownStates);
    log.info("States which occur at the root: " + rootStates);
    log.info("States which only occur at the root: " + rootStates);
    Timing transitionTimer = new Timing();
    List<List<Transition>> transitionLists = CreateTransitionSequence.createTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates);
    Index<Transition> transitionIndex = new HashIndex<>();
    for (List<Transition> transitions : transitionLists) {
        transitionIndex.addAll(transitions);
    }
    transitionTimer.done("Converting trees into transition lists");
    log.info("Number of transitions: " + transitionIndex.size());
    Random random = new Random(op.trainOptions.randomSeed);
    Treebank devTreebank = null;
    if (devTreebankPath != null) {
        devTreebank = readTreebank(devTreebankPath.first(), devTreebankPath.second());
    }
    PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates);
    newModel.trainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads);
    this.model = newModel;
}
Also used : Tagger(edu.stanford.nlp.tagger.common.Tagger) Treebank(edu.stanford.nlp.trees.Treebank) EvaluateTreebank(edu.stanford.nlp.parser.lexparser.EvaluateTreebank) HashIndex(edu.stanford.nlp.util.HashIndex) Random(java.util.Random) Tree(edu.stanford.nlp.trees.Tree) List(java.util.List) Timing(edu.stanford.nlp.util.Timing) FileFilter(java.io.FileFilter)

Aggregations

EvaluateTreebank (edu.stanford.nlp.parser.lexparser.EvaluateTreebank)1 Tagger (edu.stanford.nlp.tagger.common.Tagger)1 Tree (edu.stanford.nlp.trees.Tree)1 Treebank (edu.stanford.nlp.trees.Treebank)1 HashIndex (edu.stanford.nlp.util.HashIndex)1 Timing (edu.stanford.nlp.util.Timing)1 FileFilter (java.io.FileFilter)1 List (java.util.List)1 Random (java.util.Random)1