Search in sources :

Example 96 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method originalFindSyntacticHead.

/**
   * This is the original version of {@link #findSyntacticHead} before Chris's modifications.
   * There's no good reason to use it except for producing historical results.
   * It Finds the syntactic head of the given entity mention.
   *
   * @param ent The entity mention
   * @param root The Tree for the entire sentence in which it occurs.
   * @param tokens The Sentence in which it occurs
   * @return The tree object corresponding to the head. This MUST be a child of root.
   *     It will be a leaf in the parse tree.
   */
public Tree originalFindSyntacticHead(EntityMention ent, Tree root, List<CoreLabel> tokens) {
    logger.fine("Searching for tree matching " + ent);
    Tree exactMatch = findTreeWithSpan(root, ent.getExtentTokenStart(), ent.getExtentTokenEnd());
    //
    if (exactMatch != null) {
        logger.fine("Mention \"" + ent + "\" mapped to tree: " + printTree(exactMatch));
        return safeHead(exactMatch);
    }
    //
    // no exact match found
    // in this case, we parse the actual extent of the mention
    //
    List<CoreLabel> extentTokens = new ArrayList<>();
    for (int i = ent.getExtentTokenStart(); i < ent.getExtentTokenEnd(); i++) extentTokens.add(tokens.get(i));
    Tree tree = parse(extentTokens);
    logger.fine("No exact match found. Local parse:\n" + tree.pennString());
    convertToCoreLabels(tree);
    tree.indexSpans(ent.getExtentTokenStart());
    Tree extentHead = safeHead(tree);
    assert (extentHead != null);
    // extentHead is a child in the local extent parse tree. we need to find the
    // corresponding node in the main tree
    CoreLabel l = (CoreLabel) extentHead.label();
    Tree realHead = findTreeWithSpan(root, l.get(CoreAnnotations.BeginIndexAnnotation.class), l.get(CoreAnnotations.EndIndexAnnotation.class));
    assert (realHead != null);
    return realHead;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 97 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method funkyFindLeafWithApproximateSpan.

private Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) {
    logger.fine("Looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString());
    List<Tree> leaves = root.getLeaves();
    for (Tree leaf : leaves) {
        CoreLabel label = CoreLabel.class.cast(leaf.label());
        int ind = label.get(CoreAnnotations.BeginIndexAnnotation.class);
        // log.info("Token #" + ind + ": " + leaf.value());
        if (token.equals(leaf.value()) && ind >= index && ind <= index + approximateness) {
            return leaf;
        }
    }
    // this shouldn't happen
    // but it does happen (VERY RARELY) on some weird web text that includes SGML tags with spaces
    // TODO: does this mean that somehow tokenization is different for the parser? check this by throwing an Exception in KBP
    logger.severe("GenericDataSetReader: WARNING: Failed to find head token");
    logger.severe("  when looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString());
    return null;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 98 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method assignSyntacticHead.

/**
   * Find the index of the head of an entity.
   *
   * @param ent The entity mention
   * @param tree The Tree for the entire sentence in which it occurs.
   * @param tokens The Sentence in which it occurs
   * @param setHeadSpan Whether to set the head span in the entity mention.
   * @return The index of the entity head
   */
public int assignSyntacticHead(EntityMention ent, Tree tree, List<CoreLabel> tokens, boolean setHeadSpan) {
    if (ent.getSyntacticHeadTokenPosition() != -1) {
        return ent.getSyntacticHeadTokenPosition();
    }
    logger.finest("Finding syntactic head for entity: " + ent + " in tree: " + tree.toString());
    logger.finest("Flat sentence is: " + tokens);
    Tree sh = null;
    try {
        sh = findSyntacticHead(ent, tree, tokens);
    } catch (Exception e) {
        logger.severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + sentenceToString(tokens));
        e.printStackTrace();
    } catch (AssertionError e) {
        logger.severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + sentenceToString(tokens));
        e.printStackTrace();
    }
    int headPos = ent.getExtentTokenEnd() - 1;
    if (sh != null) {
        CoreLabel label = (CoreLabel) sh.label();
        headPos = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    } else {
        logger.fine("WARNING: failed to find syntactic head for entity: " + ent + " in tree: " + tree);
        logger.fine("Fallback strategy: will set head to last token in mention: " + tokens.get(headPos));
    }
    ent.setHeadTokenPosition(headPos);
    if (setHeadSpan) {
        // set the head span to match exactly the syntactic head
        // this is needed for some corpora where the head span is not given
        ent.setHeadTokenSpan(new Span(headPos, headPos + 1));
    }
    return headPos;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) Span(edu.stanford.nlp.ie.machinereading.structure.Span) IOException(java.io.IOException) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint)

Example 99 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class GenericDataSetReader method safeHead.

private Tree safeHead(Tree top) {
    Tree head = top.headTerminal(headFinder);
    if (head != null)
        return head;
    // if no head found return the right-most leaf
    List<Tree> leaves = top.getLeaves();
    if (leaves.size() > 0)
        return leaves.get(leaves.size() - 1);
    // fallback: return top
    return top;
}
Also used : Tree(edu.stanford.nlp.trees.Tree)

Example 100 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class MachineReading method assignSyntacticHeadToEntities.

private void assignSyntacticHeadToEntities(Annotation corpus) {
    assert (corpus != null);
    assert (corpus.get(SentencesAnnotation.class) != null);
    for (CoreMap sent : corpus.get(SentencesAnnotation.class)) {
        List<CoreLabel> tokens = sent.get(TokensAnnotation.class);
        assert (tokens != null);
        Tree tree = sent.get(TreeAnnotation.class);
        if (MachineReadingProperties.forceGenerationOfIndexSpans) {
            tree.indexSpans(0);
        }
        assert (tree != null);
        if (sent.get(EntityMentionsAnnotation.class) != null) {
            for (EntityMention e : sent.get(EntityMentionsAnnotation.class)) {
                reader.assignSyntacticHead(e, tree, tokens, true);
            }
        }
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) EntityMentionsAnnotation(edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations.EntityMentionsAnnotation) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

Tree (edu.stanford.nlp.trees.Tree)329 CoreLabel (edu.stanford.nlp.ling.CoreLabel)99 ArrayList (java.util.ArrayList)59 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)55 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)32 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)30 CoreMap (edu.stanford.nlp.util.CoreMap)27 List (java.util.List)27 Label (edu.stanford.nlp.ling.Label)24 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)20 TreeReader (edu.stanford.nlp.trees.TreeReader)19 PrintWriter (java.io.PrintWriter)19 Language (edu.stanford.nlp.international.Language)17 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)16 Treebank (edu.stanford.nlp.trees.Treebank)16 IOException (java.io.IOException)16 Mention (edu.stanford.nlp.coref.data.Mention)15 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)15