use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class DependencyCorefMentionFinder method getNPSpanOld.
private IntPair getNPSpanOld(IndexedWord headword, SemanticGraph dep, List<CoreLabel> sent) {
IndexedWord cop = dep.getChildWithReln(headword, UniversalEnglishGrammaticalRelations.COPULA);
Pair<IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.leftRightMostChildVertices(headword, dep);
// headword can be first or last word
int beginIdx = Math.min(headword.index() - 1, leftRight.first.index() - 1);
int endIdx = Math.max(headword.index() - 1, leftRight.second.index() - 1);
// no copula relation
if (cop == null)
return new IntPair(beginIdx, endIdx);
// if we have copula relation
List<IndexedWord> children = dep.getChildList(headword);
int copIdx = children.indexOf(cop);
if (copIdx + 1 < children.size()) {
beginIdx = Math.min(headword.index() - 1, SemanticGraphUtils.leftMostChildVertice(children.get(copIdx + 1), dep).index() - 1);
} else {
beginIdx = headword.index() - 1;
}
return new IntPair(beginIdx, endIdx);
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class DependencyCorefMentionFinder method extractMentionForHeadword.
private void extractMentionForHeadword(IndexedWord headword, SemanticGraph dep, CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph basic = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhanced = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhanced == null) {
enhanced = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
// pronoun
if (headword.tag().startsWith("PRP")) {
extractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet);
return;
}
// add NP mention
IntPair npSpan = getNPSpan(headword, dep, sent);
int beginIdx = npSpan.get(0);
int endIdx = npSpan.get(1) + 1;
// try not to have span that ends with ,
if (",".equals(sent.get(endIdx - 1).word())) {
endIdx--;
}
// try to remove first IN.
if ("IN".equals(sent.get(beginIdx).tag())) {
beginIdx++;
}
addMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
//
// extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above)
//
// to make sure we find the first conjunction
Set<IndexedWord> conjChildren = dep.getChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.CONJUNCT);
if (conjChildren.size() > 0) {
IndexedWord conjChild = dep.getChildWithReln(headword, UniversalEnglishGrammaticalRelations.CONJUNCT);
for (IndexedWord c : conjChildren) {
if (c.index() < conjChild.index())
conjChild = c;
}
IndexedWord left = SemanticGraphUtils.leftMostChildVertice(conjChild, dep);
for (int endIdxFirstElement = left.index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--) {
if (!sent.get(endIdxFirstElement - 1).tag().matches("CC|,")) {
if (headword.index() - 1 < endIdxFirstElement) {
addMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
}
break;
}
}
}
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class HybridCorefMentionFinder method extractNPorPRP.
private static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexLeaves();
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = npOrPrpMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
while (matcher.find()) {
Tree t = matcher.getMatch();
List<Tree> mLeaves = t.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
// try not to have span that ends with ,
if (",".equals(sent.get(endIdx - 1).word())) {
endIdx--;
}
IntPair mSpan = new IntPair(beginIdx, endIdx);
// if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP"))) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
mentions.add(m);
mentionSpanSet.add(mSpan);
if (m.originalSpan.size() > 1) {
boolean isNE = true;
for (CoreLabel cl : m.originalSpan) {
if (!cl.tag().startsWith("NNP"))
isNE = false;
}
if (isNE) {
namedEntitySpanSet.add(mSpan);
}
}
}
}
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class HybridCorefMentionFinder method extractNamedEntityMentions.
protected static void extractNamedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
String preNE = "O";
int beginIndex = -1;
for (CoreLabel w : sent) {
String nerString = w.ner();
if (!nerString.equals(preNE)) {
int endIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
if (!preNE.matches("O")) {
if (w.get(CoreAnnotations.TextAnnotation.class).equals("'s") && w.tag().equals("POS")) {
endIndex++;
}
IntPair mSpan = new IntPair(beginIndex, endIndex);
// attached to the previous NER by the earlier heuristic
if (beginIndex < endIndex && !mentionSpanSet.contains(mSpan)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
mentions.add(m);
mentionSpanSet.add(mSpan);
namedEntitySpanSet.add(mSpan);
}
}
beginIndex = endIndex;
preNE = nerString;
}
}
// NE at the end of sentence
if (!preNE.matches("O")) {
IntPair mSpan = new IntPair(beginIndex, sent.size());
if (!mentionSpanSet.contains(mSpan)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIndex, sent.size(), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, sent.size())));
mentions.add(m);
mentionSpanSet.add(mSpan);
namedEntitySpanSet.add(mSpan);
}
}
}
Aggregations