Search in sources :

Example 1 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method extractEnumerations.

protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
        matcher.getMatch();
        Tree m1 = matcher.getNode("m1");
        Tree m2 = matcher.getNode("m2");
        List<Tree> mLeaves = m1.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
        mLeaves = m2.getLeaves();
        beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
    }
    for (IntPair mSpan : spanToMentionSubTree.keySet()) {
        if (!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, mSpan.get(0), mSpan.get(1), dependency, new ArrayList<>(sent.subList(mSpan.get(0), mSpan.get(1))), spanToMentionSubTree.get(mSpan));
            mentions.add(m);
            mentionSpanSet.add(mSpan);
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 2 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class MentionExtractor method findTreePattern.

private void findTreePattern(Tree tree, TregexPattern tgrepPattern, Set<Pair<Integer, Integer>> foundPairs) {
    try {
        TregexMatcher m = tgrepPattern.matcher(tree);
        while (m.find()) {
            Tree t = m.getMatch();
            Tree np1 = m.getNode("m1");
            Tree np2 = m.getNode("m2");
            Tree np3 = null;
            if (tgrepPattern.pattern().contains("m3"))
                np3 = m.getNode("m3");
            addFoundPair(np1, np2, t, foundPairs);
            if (np3 != null)
                addFoundPair(np2, np3, t, foundPairs);
        }
    } catch (Exception e) {
        // shouldn't happen....
        throw new RuntimeException(e);
    }
}
Also used : Tree(edu.stanford.nlp.trees.Tree) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 3 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class Mention method setNumber.

protected void setNumber(Dictionaries dict) {
    if (mentionType == MentionType.PRONOMINAL) {
        if (dict.pluralPronouns.contains(headString)) {
            number = Number.PLURAL;
        } else if (dict.singularPronouns.contains(headString)) {
            number = Number.SINGULAR;
        } else {
            number = Number.UNKNOWN;
        }
    } else if (mentionType == MentionType.LIST) {
        number = Number.PLURAL;
    } else if (!nerString.equals("O") && mentionType != MentionType.NOMINAL) {
        // Check to see if this is a list of things
        if (!(nerString.equals("ORGANIZATION") || nerString.startsWith("ORG"))) {
            number = Number.SINGULAR;
        } else {
            // ORGs can be both plural and singular
            number = Number.UNKNOWN;
        }
    } else {
        String tag = headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
        if (tag.startsWith("N") && tag.endsWith("S")) {
            number = Number.PLURAL;
        } else if (tag.startsWith("N")) {
            number = Number.SINGULAR;
        } else {
            number = Number.UNKNOWN;
        }
    }
    if (mentionType != MentionType.PRONOMINAL) {
        if (number == Number.UNKNOWN) {
            if (dict.singularWords.contains(headString)) {
                number = Number.SINGULAR;
                SieveCoreferenceSystem.logger.finest("[Bergsma] Number set to:\tSINGULAR:\t" + headString);
            } else if (dict.pluralWords.contains(headString)) {
                number = Number.PLURAL;
                SieveCoreferenceSystem.logger.finest("[Bergsma] Number set to:\tPLURAL:\t" + headString);
            }
        }
        final String enumerationPattern = "NP < (NP=tmp $.. (/,|CC/ $.. NP))";
        TregexPattern tgrepPattern = TregexPattern.compile(enumerationPattern);
        TregexMatcher m = tgrepPattern.matcher(this.mentionSubTree);
        while (m.find()) {
            // Tree t = m.getMatch();
            if (this.mentionSubTree == m.getNode("tmp") && this.spanToString().toLowerCase().contains(" and ")) {
                number = Number.PLURAL;
            }
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Example 4 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method extractEnumerations.

protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
        matcher.getMatch();
        Tree m1 = matcher.getNode("m1");
        Tree m2 = matcher.getNode("m2");
        List<Tree> mLeaves = m1.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
        mLeaves = m2.getLeaves();
        beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
    }
    for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
        IntPair span = spanMention.getKey();
        if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
            mentions.add(m);
            mentionSpanSet.add(span);
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) ParserConstraint(edu.stanford.nlp.parser.common.ParserConstraint) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Map(java.util.Map) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 5 with TregexMatcher

use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.

the class HybridCorefMentionFinder method extractNPorPRP.

private static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    if (enhancedDependency == null) {
        enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    }
    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
        Tree t = matcher.getMatch();
        List<Tree> mLeaves = t.getLeaves();
        int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
        int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
        // try not to have span that ends with ,
        if (",".equals(sent.get(endIdx - 1).word())) {
            endIdx--;
        }
        IntPair mSpan = new IntPair(beginIdx, endIdx);
        // if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
        if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP"))) {
            int dummyMentionId = -1;
            Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
            mentions.add(m);
            mentionSpanSet.add(mSpan);
            if (m.originalSpan.size() > 1) {
                boolean isNE = true;
                for (CoreLabel cl : m.originalSpan) {
                    if (!cl.tag().startsWith("NNP"))
                        isNE = false;
                }
                if (isNE) {
                    namedEntitySpanSet.add(mSpan);
                }
            }
        }
    }
}
Also used : TregexPattern(edu.stanford.nlp.trees.tregex.TregexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher)

Aggregations

TregexMatcher (edu.stanford.nlp.trees.tregex.TregexMatcher)25 TregexPattern (edu.stanford.nlp.trees.tregex.TregexPattern)17 Tree (edu.stanford.nlp.trees.Tree)10 CoreLabel (edu.stanford.nlp.ling.CoreLabel)9 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)6 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)5 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)5 Mention (edu.stanford.nlp.coref.data.Mention)3 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)3 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)3 TregexParseException (edu.stanford.nlp.trees.tregex.TregexParseException)3 IntPair (edu.stanford.nlp.util.IntPair)3 Label (edu.stanford.nlp.ling.Label)2 SerializableFunction (edu.stanford.nlp.process.SerializableFunction)2 TreeReader (edu.stanford.nlp.trees.TreeReader)2 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)2 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)2 FrenchTreeReaderFactory (edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory)2 TsurgeonPattern (edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern)2 Pair (edu.stanford.nlp.util.Pair)2