use of edu.stanford.nlp.trees.Treebank in project CoreNLP by stanfordnlp.
the class ChineseMaxentLexicon method main.
public static void main(String[] args) {
TreebankLangParserParams tlpParams = new ChineseTreebankParserParams();
TreebankLanguagePack ctlp = tlpParams.treebankLanguagePack();
Options op = new Options(tlpParams);
TreeAnnotator ta = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);
log.info("Reading Trees...");
FileFilter trainFilter = new NumberRangesFileFilter(args[1], true);
Treebank trainTreebank = tlpParams.memoryTreebank();
trainTreebank.loadPath(args[0], trainFilter);
log.info("Annotating trees...");
Collection<Tree> trainTrees = new ArrayList<>();
for (Tree tree : trainTreebank) {
trainTrees.add(ta.transformTree(tree));
}
// saves memory
trainTreebank = null;
log.info("Training lexicon...");
Index<String> wordIndex = new HashIndex<>();
Index<String> tagIndex = new HashIndex<>();
int featureLevel = DEFAULT_FEATURE_LEVEL;
if (args.length > 3) {
featureLevel = Integer.parseInt(args[3]);
}
ChineseMaxentLexicon lex = new ChineseMaxentLexicon(op, wordIndex, tagIndex, featureLevel);
lex.initializeTraining(trainTrees.size());
lex.train(trainTrees);
lex.finishTraining();
log.info("Testing");
FileFilter testFilter = new NumberRangesFileFilter(args[2], true);
Treebank testTreebank = tlpParams.memoryTreebank();
testTreebank.loadPath(args[0], testFilter);
List<TaggedWord> testWords = new ArrayList<>();
for (Tree t : testTreebank) {
for (TaggedWord tw : t.taggedYield()) {
testWords.add(tw);
}
//testWords.addAll(t.taggedYield());
}
int[] totalAndCorrect = lex.testOnTreebank(testWords);
log.info("done.");
System.out.println(totalAndCorrect[1] + " correct out of " + totalAndCorrect[0] + " -- ACC: " + ((double) totalAndCorrect[1]) / totalAndCorrect[0]);
}
use of edu.stanford.nlp.trees.Treebank in project CoreNLP by stanfordnlp.
the class ShiftReduceParser method readBinarizedTreebank.
public List<Tree> readBinarizedTreebank(String treebankPath, FileFilter treebankFilter) {
Treebank treebank = readTreebank(treebankPath, treebankFilter);
List<Tree> binarized = binarizeTreebank(treebank, op);
log.info("Converted trees to binarized format");
return binarized;
}
use of edu.stanford.nlp.trees.Treebank in project CoreNLP by stanfordnlp.
the class ShiftReduceParser method main.
public static void main(String[] args) {
List<String> remainingArgs = Generics.newArrayList();
List<Pair<String, FileFilter>> trainTreebankPath = null;
Pair<String, FileFilter> testTreebankPath = null;
Pair<String, FileFilter> devTreebankPath = null;
String serializedPath = null;
String tlppClass = null;
String continueTraining = null;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-trainTreebank")) {
if (trainTreebankPath == null) {
trainTreebankPath = Generics.newArrayList();
}
trainTreebankPath.add(ArgUtils.getTreebankDescription(args, argIndex, "-trainTreebank"));
argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
} else if (args[argIndex].equalsIgnoreCase("-testTreebank")) {
testTreebankPath = ArgUtils.getTreebankDescription(args, argIndex, "-testTreebank");
argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
} else if (args[argIndex].equalsIgnoreCase("-devTreebank")) {
devTreebankPath = ArgUtils.getTreebankDescription(args, argIndex, "-devTreebank");
argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
} else if (args[argIndex].equalsIgnoreCase("-serializedPath") || args[argIndex].equalsIgnoreCase("-model")) {
serializedPath = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-tlpp")) {
tlppClass = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-continueTraining")) {
continueTraining = args[argIndex + 1];
argIndex += 2;
} else {
remainingArgs.add(args[argIndex]);
++argIndex;
}
}
String[] newArgs = new String[remainingArgs.size()];
newArgs = remainingArgs.toArray(newArgs);
if (trainTreebankPath == null && serializedPath == null) {
throw new IllegalArgumentException("Must specify a treebank to train from with -trainTreebank or a parser to load with -serializedPath");
}
ShiftReduceParser parser = null;
if (trainTreebankPath != null) {
log.info("Training ShiftReduceParser");
log.info("Initial arguments:");
log.info(" " + StringUtils.join(args));
if (continueTraining != null) {
parser = ShiftReduceParser.loadModel(continueTraining, ArrayUtils.concatenate(FORCE_TAGS, newArgs));
} else {
ShiftReduceOptions op = buildTrainingOptions(tlppClass, newArgs);
parser = new ShiftReduceParser(op);
}
parser.train(trainTreebankPath, devTreebankPath, serializedPath);
parser.saveModel(serializedPath);
}
if (serializedPath != null && parser == null) {
parser = ShiftReduceParser.loadModel(serializedPath, ArrayUtils.concatenate(FORCE_TAGS, newArgs));
}
if (testTreebankPath != null) {
log.info("Loading test trees from " + testTreebankPath.first());
Treebank testTreebank = parser.op.tlpParams.memoryTreebank();
testTreebank.loadPath(testTreebankPath.first(), testTreebankPath.second());
log.info("Loaded " + testTreebank.size() + " trees");
EvaluateTreebank evaluator = new EvaluateTreebank(parser.op, null, parser);
evaluator.testOnTreebank(testTreebank);
// log.info("Input tree: " + tree);
// log.info("Debinarized tree: " + query.getBestParse());
// log.info("Parsed binarized tree: " + query.getBestBinarizedParse());
// log.info("Predicted transition sequence: " + query.getBestTransitionSequence());
}
}
use of edu.stanford.nlp.trees.Treebank in project CoreNLP by stanfordnlp.
the class ShiftReduceParser method readTreebank.
public Treebank readTreebank(String treebankPath, FileFilter treebankFilter) {
log.info("Loading trees from " + treebankPath);
Treebank treebank = op.tlpParams.memoryTreebank();
treebank.loadPath(treebankPath, treebankFilter);
log.info("Read in " + treebank.size() + " trees from " + treebankPath);
return treebank;
}
use of edu.stanford.nlp.trees.Treebank in project CoreNLP by stanfordnlp.
the class ReorderingOracleTest method setUp.
public void setUp() {
Options op = new Options();
Treebank treebank = op.tlpParams.memoryTreebank();
treebank.addAll(Arrays.asList(correctTrees));
binarizedTrees = ShiftReduceParser.binarizeTreebank(treebank, op);
}
Aggregations