use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class GrammaticalRelation method getRelatedNodes.
/** Given a {@code Tree} node {@code t}, attempts to
* return a list of nodes to which node {@code t} has this
* grammatical relation, with {@code t} as the governor.
*
* @param t Target for finding dependents of t related by this GR
* @param root The root of the Tree
* @return A Collection of dependent nodes to which t bears this GR
*/
public Collection<TreeGraphNode> getRelatedNodes(TreeGraphNode t, TreeGraphNode root, HeadFinder headFinder) {
Set<TreeGraphNode> nodeList = new ArraySet<>();
for (TregexPattern p : targetPatterns) {
// cdm: I deleted: && nodeList.isEmpty()
// Initialize the TregexMatcher with the HeadFinder so that we
// can use the same HeadFinder through the entire process of
// building the dependencies
TregexMatcher m = p.matcher(root, headFinder);
while (m.findAt(t)) {
TreeGraphNode target = (TreeGraphNode) m.getNode("target");
if (target == null) {
throw new AssertionError("Expression has no target: " + p);
}
nodeList.add(target);
if (DEBUG) {
log.info("found " + this + "(" + t + "-" + t.headWordNode() + ", " + m.getNode("target") + "-" + ((TreeGraphNode) m.getNode("target")).headWordNode() + ") using pattern " + p);
for (String nodeName : m.getNodeNames()) {
if (nodeName.equals("target"))
continue;
log.info(" node " + nodeName + ": " + m.getNode(nodeName));
}
}
}
}
return nodeList;
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class HybridCorefMentionFinder method extractNPorPRP.
private static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexLeaves();
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = npOrPrpMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
while (matcher.find()) {
Tree t = matcher.getMatch();
List<Tree> mLeaves = t.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
// try not to have span that ends with ,
if (",".equals(sent.get(endIdx - 1).word())) {
endIdx--;
}
IntPair mSpan = new IntPair(beginIdx, endIdx);
// if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP"))) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
mentions.add(m);
mentionSpanSet.add(mSpan);
if (m.originalSpan.size() > 1) {
boolean isNE = true;
for (CoreLabel cl : m.originalSpan) {
if (!cl.tag().startsWith("NNP"))
isNE = false;
}
if (isNE) {
namedEntitySpanSet.add(mSpan);
}
}
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class RuleBasedCorefMentionFinder method extractNPorPRP.
public void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexLeaves();
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = npOrPrpMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
while (matcher.find()) {
Tree t = matcher.getMatch();
List<Tree> mLeaves = t.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
//if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
IntPair mSpan = new IntPair(beginIdx, endIdx);
if (!mentionSpanSet.contains(mSpan) && (lang == Locale.CHINESE || !insideNE(mSpan, namedEntitySpanSet))) {
// if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
mentions.add(m);
mentionSpanSet.add(mSpan);
// if(m.originalSpan.size() > 1) {
// boolean isNE = true;
// for(CoreLabel cl : m.originalSpan) {
// if(!cl.tag().startsWith("NNP")) isNE = false;
// }
// if(isNE) {
// namedEntitySpanSet.add(mSpan);
// }
// }
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class MentionExtractor method findTreePattern.
private void findTreePattern(Tree tree, TregexPattern tgrepPattern, Set<Pair<Integer, Integer>> foundPairs) {
try {
TregexMatcher m = tgrepPattern.matcher(tree);
while (m.find()) {
Tree t = m.getMatch();
Tree np1 = m.getNode("m1");
Tree np2 = m.getNode("m2");
Tree np3 = null;
if (tgrepPattern.pattern().contains("m3"))
np3 = m.getNode("m3");
addFoundPair(np1, np2, t, foundPairs);
if (np3 != null)
addFoundPair(np2, np3, t, foundPairs);
}
} catch (Exception e) {
// shouldn't happen....
throw new RuntimeException(e);
}
}
Aggregations