use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class RuleBasedCorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (IntPair mSpan : spanToMentionSubTree.keySet()) {
if (!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, mSpan.get(0), mSpan.get(1), dependency, new ArrayList<>(sent.subList(mSpan.get(0), mSpan.get(1))), spanToMentionSubTree.get(mSpan));
mentions.add(m);
mentionSpanSet.add(mSpan);
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class MentionExtractor method findTreePattern.
private void findTreePattern(Tree tree, TregexPattern tgrepPattern, Set<Pair<Integer, Integer>> foundPairs) {
try {
TregexMatcher m = tgrepPattern.matcher(tree);
while (m.find()) {
Tree t = m.getMatch();
Tree np1 = m.getNode("m1");
Tree np2 = m.getNode("m2");
Tree np3 = null;
if (tgrepPattern.pattern().contains("m3"))
np3 = m.getNode("m3");
addFoundPair(np1, np2, t, foundPairs);
if (np3 != null)
addFoundPair(np2, np3, t, foundPairs);
}
} catch (Exception e) {
// shouldn't happen....
throw new RuntimeException(e);
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class Mention method setNumber.
protected void setNumber(Dictionaries dict) {
if (mentionType == MentionType.PRONOMINAL) {
if (dict.pluralPronouns.contains(headString)) {
number = Number.PLURAL;
} else if (dict.singularPronouns.contains(headString)) {
number = Number.SINGULAR;
} else {
number = Number.UNKNOWN;
}
} else if (mentionType == MentionType.LIST) {
number = Number.PLURAL;
} else if (!nerString.equals("O") && mentionType != MentionType.NOMINAL) {
// Check to see if this is a list of things
if (!(nerString.equals("ORGANIZATION") || nerString.startsWith("ORG"))) {
number = Number.SINGULAR;
} else {
// ORGs can be both plural and singular
number = Number.UNKNOWN;
}
} else {
String tag = headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
if (tag.startsWith("N") && tag.endsWith("S")) {
number = Number.PLURAL;
} else if (tag.startsWith("N")) {
number = Number.SINGULAR;
} else {
number = Number.UNKNOWN;
}
}
if (mentionType != MentionType.PRONOMINAL) {
if (number == Number.UNKNOWN) {
if (dict.singularWords.contains(headString)) {
number = Number.SINGULAR;
SieveCoreferenceSystem.logger.finest("[Bergsma] Number set to:\tSINGULAR:\t" + headString);
} else if (dict.pluralWords.contains(headString)) {
number = Number.PLURAL;
SieveCoreferenceSystem.logger.finest("[Bergsma] Number set to:\tPLURAL:\t" + headString);
}
}
final String enumerationPattern = "NP < (NP=tmp $.. (/,|CC/ $.. NP))";
TregexPattern tgrepPattern = TregexPattern.compile(enumerationPattern);
TregexMatcher m = tgrepPattern.matcher(this.mentionSubTree);
while (m.find()) {
// Tree t = m.getMatch();
if (this.mentionSubTree == m.getNode("tmp") && this.spanToString().toLowerCase().contains(" and ")) {
number = Number.PLURAL;
}
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
use of edu.stanford.nlp.trees.tregex.TregexMatcher in project CoreNLP by stanfordnlp.
the class HybridCorefMentionFinder method extractNPorPRP.
private static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexLeaves();
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = npOrPrpMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
while (matcher.find()) {
Tree t = matcher.getMatch();
List<Tree> mLeaves = t.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
// try not to have span that ends with ,
if (",".equals(sent.get(endIdx - 1).word())) {
endIdx--;
}
IntPair mSpan = new IntPair(beginIdx, endIdx);
// if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP"))) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
mentions.add(m);
mentionSpanSet.add(mSpan);
if (m.originalSpan.size() > 1) {
boolean isNE = true;
for (CoreLabel cl : m.originalSpan) {
if (!cl.tag().startsWith("NNP"))
isNE = false;
}
if (isNE) {
namedEntitySpanSet.add(mSpan);
}
}
}
}
}
Aggregations