Examples with EnglishTreebankParserParams - edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams

Example 6 with EnglishTreebankParserParams

use of edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams in project CoreNLP by stanfordnlp.

the class TaggingEval method main.

/**
   * Run the scoring metric on guess/gold input. This method performs "Collinization."
   * The default language is English.
   *
   * @param args
   */
public static void main(String[] args) {
    if (args.length < minArgs) {
        System.out.println(usage.toString());
        System.exit(-1);
    }
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    int maxGoldYield = Integer.MAX_VALUE;
    boolean VERBOSE = false;
    String encoding = "UTF-8";
    String guessFile = null;
    String goldFile = null;
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, optionArgDefs);
    for (Map.Entry<String, String[]> opt : argsMap.entrySet()) {
        if (opt.getKey() == null)
            continue;
        if (opt.getKey().equals("-l")) {
            Language lang = Language.valueOf(opt.getValue()[0].trim());
            tlpp = lang.params;
        } else if (opt.getKey().equals("-y")) {
            maxGoldYield = Integer.parseInt(opt.getValue()[0].trim());
        } else if (opt.getKey().equals("-v")) {
            VERBOSE = true;
        } else if (opt.getKey().equals("-c")) {
            TaggingEval.doCatLevelEval = true;
        } else if (opt.getKey().equals("-e")) {
            encoding = opt.getValue()[0];
        } else {
            log.info(usage.toString());
            System.exit(-1);
        }
        //Non-option arguments located at key null
        String[] rest = argsMap.get(null);
        if (rest == null || rest.length < minArgs) {
            log.info(usage.toString());
            System.exit(-1);
        }
        goldFile = rest[0];
        guessFile = rest[1];
    }
    tlpp.setInputEncoding(encoding);
    final PrintWriter pwOut = tlpp.pw();
    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());
    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());
    final TaggingEval metric = new TaggingEval("Tagging LP/LR");
    final TreeTransformer tc = tlpp.collinizer();
    //The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
        Tree guessTree = guessItr.next();
        List<Label> guessYield = guessTree.yield();
        guessLineId++;
        Tree goldTree = goldItr.next();
        List<Label> goldYield = goldTree.yield();
        goldLineId++;
        // Check that we should evaluate this tree
        if (goldYield.size() > maxGoldYield) {
            skippedGuessTrees++;
            continue;
        }
        // Only trees with equal yields can be evaluated
        if (goldYield.size() != guessYield.size()) {
            pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
            skippedGuessTrees++;
            continue;
        }
        final Tree evalGuess = tc.transformTree(guessTree);
        final Tree evalGold = tc.transformTree(goldTree);
        metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
    }
    if (guessItr.hasNext() || goldItr.hasNext()) {
        System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
    }
    pwOut.println("================================================================================");
    if (skippedGuessTrees != 0)
        pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);
    pwOut.println();
    pwOut.close();
}

Also used : Treebank(edu.stanford.nlp.trees.Treebank) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams) Language(edu.stanford.nlp.international.Language) Tree(edu.stanford.nlp.trees.Tree) TreeMap(java.util.TreeMap) Map(java.util.Map) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer) PrintWriter(java.io.PrintWriter)

Example 7 with EnglishTreebankParserParams

use of edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams in project CoreNLP by stanfordnlp.

the class UnlabeledAttachmentEval method main.

/**
   * Run the Evalb scoring metric on guess/gold input. The default language is English.
   *
   * @param args
   */
public static void main(String[] args) {
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    int maxGoldYield = Integer.MAX_VALUE;
    boolean VERBOSE = false;
    String encoding = "UTF-8";
    String guessFile = null;
    String goldFile = null;
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, optionArgDefs);
    for (Map.Entry<String, String[]> opt : argsMap.entrySet()) {
        if (opt.getKey() == null)
            continue;
        if (opt.getKey().equals("-l")) {
            Language lang = Language.valueOf(opt.getValue()[0].trim());
            tlpp = lang.params;
        } else if (opt.getKey().equals("-y")) {
            maxGoldYield = Integer.parseInt(opt.getValue()[0].trim());
        } else if (opt.getKey().equals("-v")) {
            VERBOSE = true;
        } else if (opt.getKey().equals("-e")) {
            encoding = opt.getValue()[0];
        } else {
            log.info(usage.toString());
            System.exit(-1);
        }
        //Non-option arguments located at key null
        String[] rest = argsMap.get(null);
        if (rest == null || rest.length < minArgs) {
            log.info(usage.toString());
            System.exit(-1);
        }
        goldFile = rest[0];
        guessFile = rest[1];
    }
    tlpp.setInputEncoding(encoding);
    final PrintWriter pwOut = tlpp.pw();
    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());
    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());
    final UnlabeledAttachmentEval metric = new UnlabeledAttachmentEval("UAS LP/LR", true, tlpp.headFinder());
    final TreeTransformer tc = tlpp.collinizer();
    //The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while (guessItr.hasNext() && goldItr.hasNext()) {
        Tree guessTree = guessItr.next();
        List<Label> guessYield = guessTree.yield();
        guessLineId++;
        Tree goldTree = goldItr.next();
        List<Label> goldYield = goldTree.yield();
        goldLineId++;
        // Check that we should evaluate this tree
        if (goldYield.size() > maxGoldYield) {
            skippedGuessTrees++;
            continue;
        }
        // Only trees with equal yields can be evaluated
        if (goldYield.size() != guessYield.size()) {
            pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
            skippedGuessTrees++;
            continue;
        }
        final Tree evalGuess = tc.transformTree(guessTree);
        evalGuess.indexLeaves(true);
        final Tree evalGold = tc.transformTree(goldTree);
        evalGold.indexLeaves(true);
        metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
    }
    if (guessItr.hasNext() || goldItr.hasNext()) {
        System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
    }
    pwOut.println("================================================================================");
    if (skippedGuessTrees != 0)
        pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
    metric.display(true, pwOut);
    pwOut.println();
    pwOut.close();
}

Also used : Treebank(edu.stanford.nlp.trees.Treebank) Label(edu.stanford.nlp.ling.Label) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams) Language(edu.stanford.nlp.international.Language) Tree(edu.stanford.nlp.trees.Tree) Map(java.util.Map) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer) PrintWriter(java.io.PrintWriter)

Example 8 with EnglishTreebankParserParams

use of edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams in project CoreNLP by stanfordnlp.

the class SemgrexDemo method main.

public static void main(String[] args) {
    String treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
    // Typically the tree is constructed by parsing or reading a
    // treebank.  This is just for example purposes
    Tree tree = Tree.valueOf(treeString);
    // This creates English uncollapsed dependencies as a
    // SemanticGraph.  If you are creating many SemanticGraphs, you
    // should use a GrammaticalStructureFactory and use it to generate
    // the intermediate GrammaticalStructure instead
    SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
    // Alternatively, this could have been the Chinese params or any
    // other language supported.  As of 2014, only English and Chinese
    TreebankLangParserParams params = new EnglishTreebankParserParams();
    GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    log.info(graph);
    SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<nsubj {}=B");
    SemgrexMatcher matcher = semgrex.matcher(graph);
    // ancestor of both "dog" and "my" via the nsubj relation
    while (matcher.find()) {
        log.info(matcher.getNode("A") + " <<nsubj " + matcher.getNode("B"));
    }
}

Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) GrammaticalStructureFactory(edu.stanford.nlp.trees.GrammaticalStructureFactory) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams)

Aggregations

EnglishTreebankParserParams (edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams)8 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)8 Tree (edu.stanford.nlp.trees.Tree)8 Language (edu.stanford.nlp.international.Language)7 PrintWriter (java.io.PrintWriter)7 Label (edu.stanford.nlp.ling.Label)5 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)4 DiskTreebank (edu.stanford.nlp.trees.DiskTreebank)4 ArrayList (java.util.ArrayList)4 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)3 Treebank (edu.stanford.nlp.trees.Treebank)3 CoreLabel (edu.stanford.nlp.ling.CoreLabel)2 Map (java.util.Map)2 Lexicon (edu.stanford.nlp.parser.lexparser.Lexicon)1 Options (edu.stanford.nlp.parser.lexparser.Options)1 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)1 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)1 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)1 GrammaticalStructure (edu.stanford.nlp.trees.GrammaticalStructure)1 GrammaticalStructureFactory (edu.stanford.nlp.trees.GrammaticalStructureFactory)1