use of edu.stanford.nlp.tagger.common.Tagger in project CoreNLP by stanfordnlp.
the class ShiftReduceParser method train.
private void train(List<Pair<String, FileFilter>> trainTreebankPath, Pair<String, FileFilter> devTreebankPath, String serializedPath) {
log.info("Training method: " + op.trainOptions().trainingMethod);
List<Tree> binarizedTrees = Generics.newArrayList();
for (Pair<String, FileFilter> treebank : trainTreebankPath) {
binarizedTrees.addAll(readBinarizedTreebank(treebank.first(), treebank.second()));
}
int nThreads = op.trainOptions.trainingThreads;
nThreads = nThreads <= 0 ? Runtime.getRuntime().availableProcessors() : nThreads;
Tagger tagger = null;
if (op.testOptions.preTag) {
Timing retagTimer = new Timing();
tagger = Tagger.loadModel(op.testOptions.taggerSerializedFile);
redoTags(binarizedTrees, tagger, nThreads);
retagTimer.done("Retagging");
}
Set<String> knownStates = findKnownStates(binarizedTrees);
Set<String> rootStates = findRootStates(binarizedTrees);
Set<String> rootOnlyStates = findRootOnlyStates(binarizedTrees, rootStates);
log.info("Known states: " + knownStates);
log.info("States which occur at the root: " + rootStates);
log.info("States which only occur at the root: " + rootStates);
Timing transitionTimer = new Timing();
List<List<Transition>> transitionLists = CreateTransitionSequence.createTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates);
Index<Transition> transitionIndex = new HashIndex<>();
for (List<Transition> transitions : transitionLists) {
transitionIndex.addAll(transitions);
}
transitionTimer.done("Converting trees into transition lists");
log.info("Number of transitions: " + transitionIndex.size());
Random random = new Random(op.trainOptions.randomSeed);
Treebank devTreebank = null;
if (devTreebankPath != null) {
devTreebank = readTreebank(devTreebankPath.first(), devTreebankPath.second());
}
PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates);
newModel.trainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads);
this.model = newModel;
}
Aggregations