use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractNamedEntityModifiers.
// extract mentions which have same string as another stand-alone mention
protected static void extractNamedEntityModifiers(List<CoreMap> sentences, List<Set<IntPair>> mentionSpanSetList, List<List<Mention>> predictedMentions, Set<String> neStrings) {
for (int i = 0, sz = sentences.size(); i < sz; i++) {
List<Mention> mentions = predictedMentions.get(i);
CoreMap sent = sentences.get(i);
List<CoreLabel> tokens = sent.get(TokensAnnotation.class);
Set<IntPair> mentionSpanSet = mentionSpanSetList.get(i);
for (int j = 0, tSize = tokens.size(); j < tSize; j++) {
for (String ne : neStrings) {
int len = ne.split(" ").length;
if (j + len > tokens.size())
continue;
StringBuilder sb = new StringBuilder();
for (int k = 0; k < len; k++) {
sb.append(tokens.get(k + j).word()).append(" ");
}
String phrase = sb.toString().trim();
int beginIndex = j;
int endIndex = j + len;
// include "'s" if it belongs to this named entity
if (endIndex < tokens.size() && tokens.get(endIndex).word().equals("'s") && tokens.get(endIndex).tag().equals("POS")) {
Tree tree = sent.get(TreeAnnotation.class);
Tree sToken = tree.getLeaves().get(beginIndex);
Tree eToken = tree.getLeaves().get(endIndex);
Tree join = tree.joinNode(sToken, eToken);
Tree sJoin = join.getLeaves().get(0);
Tree eJoin = join.getLeaves().get(join.getLeaves().size() - 1);
if (sToken == sJoin && eToken == eJoin) {
endIndex++;
}
}
// include DT if it belongs to this named entity
if (beginIndex > 0 && tokens.get(beginIndex - 1).tag().equals("DT")) {
Tree tree = sent.get(TreeAnnotation.class);
Tree sToken = tree.getLeaves().get(beginIndex - 1);
Tree eToken = tree.getLeaves().get(endIndex - 1);
Tree join = tree.joinNode(sToken, eToken);
Tree sJoin = join.getLeaves().get(0);
Tree eJoin = join.getLeaves().get(join.getLeaves().size() - 1);
if (sToken == sJoin && eToken == eJoin) {
beginIndex--;
}
}
IntPair span = new IntPair(beginIndex, endIndex);
if (phrase.equalsIgnoreCase(ne) && !mentionSpanSet.contains(span)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIndex, endIndex, tokens, sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) != null ? sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) : sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), new ArrayList<>(tokens.subList(beginIndex, endIndex)));
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
}
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractPremarkedEntityMentions.
protected static void extractPremarkedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
int beginIndex = -1;
for (CoreLabel w : sent) {
MultiTokenTag t = w.get(CoreAnnotations.MentionTokenAnnotation.class);
if (t != null) {
// Part of a mention
if (t.isStart()) {
// Start of mention
beginIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
}
if (t.isEnd()) {
// end of mention
int endIndex = w.get(CoreAnnotations.IndexAnnotation.class);
if (beginIndex >= 0) {
IntPair mSpan = new IntPair(beginIndex, endIndex);
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
mentions.add(m);
mentionSpanSet.add(mSpan);
beginIndex = -1;
} else {
Redwood.log("Start of marked mention not found in sentence: " + t + " at tokenIndex=" + (w.get(CoreAnnotations.IndexAnnotation.class) - 1) + " for " + s.get(CoreAnnotations.TextAnnotation.class));
}
}
}
}
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class DependencyCorefMentionFinder method extractPronounForHeadword.
private void extractPronounForHeadword(IndexedWord headword, SemanticGraph dep, CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph basic = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhanced = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhanced == null) {
enhanced = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
int beginIdx = headword.index() - 1;
int endIdx = headword.index();
// handle "you all", "they both" etc
if (sent.size() > headword.index() && sent.get(headword.index()).word().matches("all|both")) {
IndexedWord c = dep.getNodeByIndex(headword.index() + 1);
SemanticGraphEdge edge = dep.getEdge(headword, c);
if (edge != null)
endIdx++;
}
IntPair mSpan = new IntPair(beginIdx, endIdx);
if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet))) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new ArrayList<>(sent.subList(beginIdx, endIdx)));
m.headIndex = headword.index() - 1;
m.headWord = sent.get(m.headIndex);
m.headString = m.headWord.word().toLowerCase(Locale.ENGLISH);
mentions.add(m);
mentionSpanSet.add(mSpan);
}
// when pronoun is a part of conjunction (e.g., you and I)
Set<IndexedWord> conjChildren = dep.getChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.CONJUNCT);
if (conjChildren.size() > 0) {
IntPair npSpan = getNPSpan(headword, dep, sent);
beginIdx = npSpan.get(0);
endIdx = npSpan.get(1) + 1;
// try not to have span that ends with ,
if (",".equals(sent.get(endIdx - 1).word())) {
endIdx--;
}
addMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
}
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class DependencyCorefMentionFinder method findMentions.
/** Main method of mention detection.
* Extract all NP, PRP or NE, and filter out by manually written patterns.
*/
@Override
public List<List<Mention>> findMentions(Annotation doc, Dictionaries dict, Properties props) {
List<List<Mention>> predictedMentions = new ArrayList<>();
Set<String> neStrings = Generics.newHashSet();
List<Set<IntPair>> mentionSpanSetList = Generics.newArrayList();
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap s : sentences) {
List<Mention> mentions = new ArrayList<>();
predictedMentions.add(mentions);
Set<IntPair> mentionSpanSet = Generics.newHashSet();
Set<IntPair> namedEntitySpanSet = Generics.newHashSet();
extractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
HybridCorefMentionFinder.extractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
extractNPorPRPFromDependency(s, mentions, mentionSpanSet, namedEntitySpanSet);
addNamedEntityStrings(s, neStrings, namedEntitySpanSet);
mentionSpanSetList.add(mentionSpanSet);
}
for (int i = 0; i < sentences.size(); i++) {
findHead(sentences.get(i), predictedMentions.get(i));
}
// mention selection based on document-wise info
removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang);
// if this is for MD training, skip classification
if (!CorefProperties.isMentionDetectionTraining(props)) {
mdClassifier.classifyMentions(predictedMentions, dict, props);
}
return predictedMentions;
}
use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.
the class DependencyCorefMentionFinder method addMention.
private void addMention(int beginIdx, int endIdx, IndexedWord headword, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet, List<CoreLabel> sent, SemanticGraph basic, SemanticGraph enhanced) {
IntPair mSpan = new IntPair(beginIdx, endIdx);
if (!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet))) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new ArrayList<>(sent.subList(beginIdx, endIdx)));
m.headIndex = headword.index() - 1;
m.headWord = sent.get(m.headIndex);
m.headString = m.headWord.word().toLowerCase(Locale.ENGLISH);
mentions.add(m);
mentionSpanSet.add(mSpan);
}
}
Aggregations