Search in sources :

Example 1 with CategoryWordTagFactory

use of edu.stanford.nlp.ling.CategoryWordTagFactory in project CoreNLP by stanfordnlp.

the class JointParsingModel method run.

public boolean run(File trainTreebankFile, File testTreebankFile, InputStream inputStream) {
    op = new Options();
    op.tlpParams = new ArabicTreebankParserParams();
    op.setOptions("-arabicFactored");
    op.testOptions.maxLength = maxSentLen;
    //500000 is the default for Arabic, but we have substantially more edges now
    op.testOptions.MAX_ITEMS = 5000000;
    op.testOptions.outputFormatOptions = "removeTopBracket,includePunctuationDependencies";
    // WSG: Just set this to some high value so that extractBestParse()
    // actually calls the lattice reader (e.g., this says that we can't have a word longer than
    // 80 characters...seems sensible for Arabic
    op.testOptions.maxSpanForTags = 80;
    treePrint = op.testOptions.treePrint(op.tlpParams);
    debinarizer = new Debinarizer(op.forceCNF, new CategoryWordTagFactory());
    subcategoryStripper = op.tlpParams.subcategoryStripper();
    Timing.startTime();
    final Treebank trainTreebank = op.tlpParams.diskTreebank();
    trainTreebank.loadPath(trainTreebankFile);
    lp = getParserDataFromTreebank(trainTreebank);
    makeParsers();
    if (VERBOSE) {
        op.display();
        String lexNumRules = (pparser != null) ? Integer.toString(lp.lex.numRules()) : "";
        log.info("Grammar\tStates\tTags\tWords\tUnaryR\tBinaryR\tTaggings");
        log.info("Grammar\t" + lp.stateIndex.size() + '\t' + lp.tagIndex.size() + '\t' + lp.wordIndex.size() + '\t' + (pparser != null ? lp.ug.numRules() : "") + '\t' + (pparser != null ? lp.bg.numRules() : "") + '\t' + lexNumRules);
        log.info("ParserPack is " + op.tlpParams.getClass().getName());
        log.info("Lexicon is " + lp.lex.getClass().getName());
    }
    return parse(inputStream);
}
Also used : CategoryWordTagFactory(edu.stanford.nlp.ling.CategoryWordTagFactory) Treebank(edu.stanford.nlp.trees.Treebank)

Example 2 with CategoryWordTagFactory

use of edu.stanford.nlp.ling.CategoryWordTagFactory in project CoreNLP by stanfordnlp.

the class FastFactoredParser method depScoreTree.

/** Use the DependencyGrammar to score the tree.
   *
   * @param tr A binarized tree (as returned by the PCFG parser
   * @return The score for the tree according to the grammar
   */
private double depScoreTree(Tree tr) {
    // log.info("Here's our tree:");
    // tr.pennPrint();
    // log.info(Trees.toDebugStructureString(tr));
    Tree cwtTree = tr.deepCopy(new LabeledScoredTreeFactory(), new CategoryWordTagFactory());
    cwtTree.percolateHeads(binHeadFinder);
    // log.info("Here's what it went to:");
    // cwtTree.pennPrint();
    List<IntDependency> deps = MLEDependencyGrammar.treeToDependencyList(cwtTree, wordIndex, tagIndex);
    // log.info("Here's the deps:\n" + deps);
    return dg.scoreAll(deps);
}
Also used : CategoryWordTagFactory(edu.stanford.nlp.ling.CategoryWordTagFactory) Tree(edu.stanford.nlp.trees.Tree) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory)

Aggregations

CategoryWordTagFactory (edu.stanford.nlp.ling.CategoryWordTagFactory)2 LabeledScoredTreeFactory (edu.stanford.nlp.trees.LabeledScoredTreeFactory)1 Tree (edu.stanford.nlp.trees.Tree)1 Treebank (edu.stanford.nlp.trees.Treebank)1