Search in sources :

Example 1 with Tree

use of edu.stanford.nlp.trees.Tree in project lucida by claritylab.

the class StanfordParser method mapOffsets.

/**
     * Maps Tree node offsets using provided mapping.
     * @param tree the Tree whose begin and end extents should be mapped.
     * @param mapping the list of RangeMap objects which defines the mapping.
     */
protected static void mapOffsets(Tree tree, List<RangeMap> mapping) {
    // if mapping is empty, then assume 1-to-1 mapping.
    if (mapping == null || mapping.size() == 0)
        return;
    int begin_map_index = 0;
    RangeMap begin_rmap = mapping.get(begin_map_index);
    TREE: for (Tree t : tree) {
        if (t.isLeaf())
            continue;
        MapLabel label = (MapLabel) t.label();
        int begin = (Integer) label.get(BEGIN_KEY);
        // "end" must be index of last char in range            
        int end = (Integer) label.get(END_KEY) - 1;
        // annotation.begin");
        while (begin_rmap.end <= begin) {
            begin_map_index++;
            if (begin_map_index >= mapping.size())
                break TREE;
            begin_rmap = mapping.get(begin_map_index);
        }
        // mapping is 1-to-1).
        if (begin_rmap.begin > end) {
            // mapping)");
            continue;
        }
        // if beginning of current annotation falls within current range
        // map, then map it back to source space.
        int new_begin = begin;
        if (begin_rmap.begin <= new_begin) {
            // log.debug("Applying RangeMap to begin offset");
            new_begin = begin_rmap.map(new_begin);
        }
        // find the first rangemap whose end is greater than the end of
        // current annotation.
        // log.debug("Finding RangeMap whose extents include
        // annotation.end");
        int end_map_index = begin_map_index;
        RangeMap end_rmap = begin_rmap;
        END_OFFSET: while (end_rmap.end <= end) {
            end_map_index++;
            if (end_map_index >= mapping.size())
                break END_OFFSET;
            end_rmap = mapping.get(end_map_index);
        }
        // if end of current annotation falls within "end" range map,
        // then map it back to source space.
        int new_end = end;
        if (end_rmap.begin <= end) {
            // log.debug("Applying RangeMap to end offset");
            new_end = end_rmap.map(end);
        }
        label.put(BEGIN_KEY, new_begin);
        label.put(END_KEY, new_end + 1);
    }
}
Also used : DeltaRangeMap(edu.cmu.lti.javelin.util.DeltaRangeMap) RangeMap(edu.cmu.lti.javelin.util.RangeMap) Tree(edu.stanford.nlp.trees.Tree) MapLabel(edu.stanford.nlp.ling.MapLabel)

Example 2 with Tree

use of edu.stanford.nlp.trees.Tree in project lucida by claritylab.

the class StanfordParser method updateTreeLabels.

protected static void updateTreeLabels(Tree root, Tree tree, MutableInteger offset, MutableInteger leafIndex) {
    if (tree.isLeaf()) {
        leafIndex.value++;
        return;
    }
    String labelValue = tree.label().value().toUpperCase();
    int begin = root.leftCharEdge(tree);
    int end = root.rightCharEdge(tree);
    //System.out.println(labelValue+"("+begin+","+end+")");
    int length = end - begin;
    // apply offset to begin extent
    begin += offset.value;
    // calculate offset delta based on label
    if (double_quote_lable_pattern.matcher(labelValue).matches() && length > 1) {
        offset.value--;
        log.debug("Quotes label pattern fired: " + offset);
    } else if (bracket_label_pattern.matcher(labelValue).matches()) {
        offset.value -= 4;
        log.debug("Bracket label pattern fired: " + offset);
    } else if (tree.isPreTerminal()) {
        Tree leaf = tree.firstChild();
        String text = leaf.label().value();
        Matcher matcher = escaped_char_pattern.matcher(text);
        while (matcher.find()) {
            offset.value--;
        }
    }
    for (Tree child : tree.children()) updateTreeLabels(root, child, offset, leafIndex);
    // apply offset to end extent
    end += offset.value;
    // set begin and end offsets on node
    MapLabel label = new MapLabel(tree.label());
    label.put(BEGIN_KEY, begin);
    label.put(END_KEY, end);
    label.put(MapLabel.INDEX_KEY, leafIndex.value);
    tree.setLabel(label);
}
Also used : Matcher(java.util.regex.Matcher) Tree(edu.stanford.nlp.trees.Tree) MapLabel(edu.stanford.nlp.ling.MapLabel)

Example 3 with Tree

use of edu.stanford.nlp.trees.Tree in project lucida by claritylab.

the class StanfordParser method parse.

/**
     * Parses a sentence and returns a string representation of the parse tree.
     * 
     * @param sentence a sentence
     * @return Tree whose Label is a MapLabel containing correct begin and end
     * character offsets in keys BEGIN_KEY and END_KEY
     */
@SuppressWarnings("unchecked")
public static String parse(String sentence) {
    if (tlp == null || parser == null)
        throw new RuntimeException("Parser has not been initialized");
    // parse the sentence to produce stanford Tree
    log.debug("Parsing sentence");
    Tree tree = null;
    synchronized (parser) {
        Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
        List<Word> words = tokenizer.tokenize();
        log.debug("Tokenization: " + words);
        parser.parse(new Sentence(words));
        tree = parser.getBestParse();
    }
    return tree.toString().replaceAll(" \\[[\\S]+\\]", "");
}
Also used : Word(edu.stanford.nlp.ling.Word) StringReader(java.io.StringReader) Tree(edu.stanford.nlp.trees.Tree) Tokenizer(edu.stanford.nlp.process.Tokenizer) Sentence(edu.stanford.nlp.ling.Sentence)

Example 4 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class TsarfatyEval method main.

/**
   * Run the scoring metric on guess/gold input. This method performs "Collinization." 
   * The default language is English.
   * 
   * @param args
   */
public static void main(String[] args) {
    if (args.length < minArgs) {
        System.out.println(usage.toString());
        System.exit(-1);
    }
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    int maxGoldYield = Integer.MAX_VALUE;
    int maxGuessYield = Integer.MAX_VALUE;
    boolean VERBOSE = false;
    boolean skipGuess = false;
    boolean tagMode = false;
    String guessFile = null;
    String goldFile = null;
    for (int i = 0; i < args.length; i++) {
        if (args[i].startsWith("-")) {
            switch(args[i]) {
                case "-l":
                    Language lang = Language.valueOf(args[++i].trim());
                    tlpp = lang.params;
                    break;
                case "-y":
                    maxGoldYield = Integer.parseInt(args[++i].trim());
                    break;
                case "-t":
                    tagMode = true;
                    break;
                case "-v":
                    VERBOSE = true;
                    break;
                case "-g":
                    maxGuessYield = Integer.parseInt(args[++i].trim());
                    skipGuess = true;
                    break;
                default:
                    System.out.println(usage.toString());
                    System.exit(-1);
            }
        } else {
            //Required parameters
            goldFile = args[i++];
            guessFile = args[i];
            break;
        }
    }
    final PrintWriter pwOut = tlpp.pw();
    final Treebank guessTreebank = tlpp.diskTreebank();
    guessTreebank.loadPath(guessFile);
    pwOut.println("GUESS TREEBANK:");
    pwOut.println(guessTreebank.textualSummary());
    final Treebank goldTreebank = tlpp.diskTreebank();
    goldTreebank.loadPath(goldFile);
    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());
    final String evalName = (tagMode) ? "TsarfatyTAG" : "TsarfatySEG";
    final TsarfatyEval eval = new TsarfatyEval(evalName, tagMode);
    final TreeTransformer tc = tlpp.collinizer();
    //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
    //don't match, we need to keep looking for the next gold tree that matches.
    //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
    //status as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    int goldLineId = 0;
    int skippedGuessTrees = 0;
    for (final Tree guess : guessTreebank) {
        final Tree evalGuess = tc.transformTree(guess);
        final ArrayList<Label> guessSent = guess.yield();
        final String guessChars = SentenceUtils.listToString(guessSent).replaceAll("\\s+", "");
        if (guessSent.size() > maxGuessYield) {
            skippedGuessTrees++;
            continue;
        }
        boolean doneEval = false;
        while (goldItr.hasNext() && !doneEval) {
            final Tree gold = goldItr.next();
            final Tree evalGold = tc.transformTree(gold);
            goldLineId++;
            final ArrayList<Label> goldSent = gold.yield();
            final String goldChars = SentenceUtils.listToString(goldSent).replaceAll("\\s+", "");
            if (goldSent.size() > maxGoldYield) {
                continue;
            } else if (goldChars.length() != guessChars.length()) {
                pwOut.printf("Char level yield mismatch at line %d (guess: %d gold: %d)\n", goldLineId, guessChars.length(), goldChars.length());
                skippedGuessTrees++;
                //Default evalb behavior -- skip this guess tree
                break;
            }
            eval.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
            //Move to the next guess parse
            doneEval = true;
        }
    }
    pwOut.println("================================================================================");
    if (skippedGuessTrees != 0)
        pwOut.printf("%s %d guess trees\n", ((skipGuess) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees);
    eval.display(true, pwOut);
    pwOut.println();
    pwOut.close();
}
Also used : Treebank(edu.stanford.nlp.trees.Treebank) Label(edu.stanford.nlp.ling.Label) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams) Language(edu.stanford.nlp.international.Language) Tree(edu.stanford.nlp.trees.Tree) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer) PrintWriter(java.io.PrintWriter)

Example 5 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class MLEDependencyGrammar method tune.

/** Tune the smoothing and interpolation parameters of the dependency
   *  grammar based on a tuning treebank.
   *
   *  @param trees A Collection of Trees for setting parameters
   */
@Override
public void tune(Collection<Tree> trees) {
    List<IntDependency> deps = new ArrayList<>();
    for (Tree tree : trees) {
        deps.addAll(treeToDependencyList(tree, wordIndex, tagIndex));
    }
    double bestScore = Double.NEGATIVE_INFINITY;
    double bestSmooth_stop = 0.0;
    double bestSmooth_aTW_hTWd = 0.0;
    double bestSmooth_aT_hTWd = 0.0;
    double bestInterp = 0.0;
    log.info("Tuning smooth_stop...");
    for (smooth_stop = 1.0 / 100.0; smooth_stop < 100.0; smooth_stop *= 1.25) {
        double totalScore = 0.0;
        for (IntDependency dep : deps) {
            if (!rootTW(dep.head)) {
                double stopProb = getStopProb(dep);
                if (!dep.arg.equals(stopTW)) {
                    stopProb = 1.0 - stopProb;
                }
                if (stopProb > 0.0) {
                    totalScore += Math.log(stopProb);
                }
            }
        }
        if (totalScore > bestScore) {
            bestScore = totalScore;
            bestSmooth_stop = smooth_stop;
        }
    }
    smooth_stop = bestSmooth_stop;
    log.info("Tuning selected smooth_stop: " + smooth_stop);
    for (Iterator<IntDependency> iter = deps.iterator(); iter.hasNext(); ) {
        IntDependency dep = iter.next();
        if (dep.arg.equals(stopTW)) {
            iter.remove();
        }
    }
    log.info("Tuning other parameters...");
    if (!useSmoothTagProjection) {
        bestScore = Double.NEGATIVE_INFINITY;
        for (smooth_aTW_hTWd = 0.5; smooth_aTW_hTWd < 100.0; smooth_aTW_hTWd *= 1.25) {
            log.info(".");
            for (smooth_aT_hTWd = 0.5; smooth_aT_hTWd < 100.0; smooth_aT_hTWd *= 1.25) {
                for (interp = 0.02; interp < 1.0; interp += 0.02) {
                    double totalScore = 0.0;
                    for (IntDependency dep : deps) {
                        double score = score(dep);
                        if (score > Double.NEGATIVE_INFINITY) {
                            totalScore += score;
                        }
                    }
                    if (totalScore > bestScore) {
                        bestScore = totalScore;
                        bestInterp = interp;
                        bestSmooth_aTW_hTWd = smooth_aTW_hTWd;
                        bestSmooth_aT_hTWd = smooth_aT_hTWd;
                        log.info("Current best interp: " + interp + " with score " + totalScore);
                    }
                }
            }
        }
        smooth_aTW_hTWd = bestSmooth_aTW_hTWd;
        smooth_aT_hTWd = bestSmooth_aT_hTWd;
        interp = bestInterp;
    } else {
        // for useSmoothTagProjection
        double bestSmooth_aTW_aT = 0.0;
        double bestSmooth_aTW_hTd = 0.0;
        double bestSmooth_aT_hTd = 0.0;
        bestScore = Double.NEGATIVE_INFINITY;
        for (smooth_aTW_hTWd = 1.125; smooth_aTW_hTWd < 100.0; smooth_aTW_hTWd *= 1.5) {
            log.info("#");
            for (smooth_aT_hTWd = 1.125; smooth_aT_hTWd < 100.0; smooth_aT_hTWd *= 1.5) {
                log.info(":");
                for (smooth_aTW_aT = 1.125; smooth_aTW_aT < 200.0; smooth_aTW_aT *= 1.5) {
                    log.info(".");
                    for (smooth_aTW_hTd = 1.125; smooth_aTW_hTd < 100.0; smooth_aTW_hTd *= 1.5) {
                        for (smooth_aT_hTd = 1.125; smooth_aT_hTd < 100.0; smooth_aT_hTd *= 1.5) {
                            for (interp = 0.2; interp <= 0.8; interp += 0.02) {
                                double totalScore = 0.0;
                                for (IntDependency dep : deps) {
                                    double score = score(dep);
                                    if (score > Double.NEGATIVE_INFINITY) {
                                        totalScore += score;
                                    }
                                }
                                if (totalScore > bestScore) {
                                    bestScore = totalScore;
                                    bestInterp = interp;
                                    bestSmooth_aTW_hTWd = smooth_aTW_hTWd;
                                    bestSmooth_aT_hTWd = smooth_aT_hTWd;
                                    bestSmooth_aTW_aT = smooth_aTW_aT;
                                    bestSmooth_aTW_hTd = smooth_aTW_hTd;
                                    bestSmooth_aT_hTd = smooth_aT_hTd;
                                    log.info("Current best interp: " + interp + " with score " + totalScore);
                                }
                            }
                        }
                    }
                }
            }
            log.info();
        }
        smooth_aTW_hTWd = bestSmooth_aTW_hTWd;
        smooth_aT_hTWd = bestSmooth_aT_hTWd;
        smooth_aTW_aT = bestSmooth_aTW_aT;
        smooth_aTW_hTd = bestSmooth_aTW_hTd;
        smooth_aT_hTd = bestSmooth_aT_hTd;
        interp = bestInterp;
    }
    log.info("\nTuning selected smooth_aTW_hTWd: " + smooth_aTW_hTWd + " smooth_aT_hTWd: " + smooth_aT_hTWd + " interp: " + interp + " smooth_aTW_aT: " + smooth_aTW_aT + " smooth_aTW_hTd: " + smooth_aTW_hTd + " smooth_aT_hTd: " + smooth_aT_hTd);
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree)

Aggregations

Tree (edu.stanford.nlp.trees.Tree)329 CoreLabel (edu.stanford.nlp.ling.CoreLabel)99 ArrayList (java.util.ArrayList)59 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)55 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)32 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)30 CoreMap (edu.stanford.nlp.util.CoreMap)27 List (java.util.List)27 Label (edu.stanford.nlp.ling.Label)24 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)20 TreeReader (edu.stanford.nlp.trees.TreeReader)19 PrintWriter (java.io.PrintWriter)19 Language (edu.stanford.nlp.international.Language)17 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)16 Treebank (edu.stanford.nlp.trees.Treebank)16 IOException (java.io.IOException)16 Mention (edu.stanford.nlp.coref.data.Mention)15 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)15