use of edu.stanford.nlp.ling.CategoryWordTagFactory in project CoreNLP by stanfordnlp.
the class JointParsingModel method run.
public boolean run(File trainTreebankFile, File testTreebankFile, InputStream inputStream) {
op = new Options();
op.tlpParams = new ArabicTreebankParserParams();
op.setOptions("-arabicFactored");
op.testOptions.maxLength = maxSentLen;
//500000 is the default for Arabic, but we have substantially more edges now
op.testOptions.MAX_ITEMS = 5000000;
op.testOptions.outputFormatOptions = "removeTopBracket,includePunctuationDependencies";
// WSG: Just set this to some high value so that extractBestParse()
// actually calls the lattice reader (e.g., this says that we can't have a word longer than
// 80 characters...seems sensible for Arabic
op.testOptions.maxSpanForTags = 80;
treePrint = op.testOptions.treePrint(op.tlpParams);
debinarizer = new Debinarizer(op.forceCNF, new CategoryWordTagFactory());
subcategoryStripper = op.tlpParams.subcategoryStripper();
Timing.startTime();
final Treebank trainTreebank = op.tlpParams.diskTreebank();
trainTreebank.loadPath(trainTreebankFile);
lp = getParserDataFromTreebank(trainTreebank);
makeParsers();
if (VERBOSE) {
op.display();
String lexNumRules = (pparser != null) ? Integer.toString(lp.lex.numRules()) : "";
log.info("Grammar\tStates\tTags\tWords\tUnaryR\tBinaryR\tTaggings");
log.info("Grammar\t" + lp.stateIndex.size() + '\t' + lp.tagIndex.size() + '\t' + lp.wordIndex.size() + '\t' + (pparser != null ? lp.ug.numRules() : "") + '\t' + (pparser != null ? lp.bg.numRules() : "") + '\t' + lexNumRules);
log.info("ParserPack is " + op.tlpParams.getClass().getName());
log.info("Lexicon is " + lp.lex.getClass().getName());
}
return parse(inputStream);
}
use of edu.stanford.nlp.ling.CategoryWordTagFactory in project CoreNLP by stanfordnlp.
the class FastFactoredParser method depScoreTree.
/** Use the DependencyGrammar to score the tree.
*
* @param tr A binarized tree (as returned by the PCFG parser
* @return The score for the tree according to the grammar
*/
private double depScoreTree(Tree tr) {
// log.info("Here's our tree:");
// tr.pennPrint();
// log.info(Trees.toDebugStructureString(tr));
Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory());
cwtTree.percolateHeads(binHeadFinder);
// log.info("Here's what it went to:");
// cwtTree.pennPrint();
List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex);
// log.info("Here's the deps:\n" + deps);
return dg.scoreAll(deps);
}
Aggregations