use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class HybridCorefMentionFinder method findMentions.
@Override
public List<List<Mention>> findMentions(Annotation doc, Dictionaries dict, Properties props) {
List<List<Mention>> predictedMentions = new ArrayList<>();
Set<String> neStrings = Generics.newHashSet();
List<Set<IntPair>> mentionSpanSetList = Generics.newArrayList();
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
// extract premarked mentions, NP/PRP, named entity, enumerations
for (CoreMap s : sentences) {
List<Mention> mentions = new ArrayList<>();
predictedMentions.add(mentions);
Set<IntPair> mentionSpanSet = Generics.newHashSet();
Set<IntPair> namedEntitySpanSet = Generics.newHashSet();
extractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
extractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
extractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet);
extractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet);
addNamedEntityStrings(s, neStrings, namedEntitySpanSet);
mentionSpanSetList.add(mentionSpanSet);
}
extractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings);
// find head
for (int i = 0; i < sentences.size(); i++) {
findHead(sentences.get(i), predictedMentions.get(i));
}
// mention selection based on document-wise info
removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang);
// if this is for MD training, skip classification
if (!CorefProperties.isMentionDetectionTraining(props)) {
mdClassifier.classifyMentions(predictedMentions, dict, props);
}
return predictedMentions;
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReader method writeTabSep.
public static void writeTabSep(PrintWriter pw, CoreMap sentence, CollectionValuedMap<String, CoreMap> chainmap) {
HeadFinder headFinder = new ModCollinsHeadFinder();
List<CoreLabel> sentenceAnno = sentence.get(CoreAnnotations.TokensAnnotation.class);
Tree sentenceTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
Map<Pair<Integer, Integer>, String> sentenceInfo = Generics.newHashMap();
Set<Tree> sentenceSubTrees = sentenceTree.subTrees();
sentenceTree.setSpans();
Map<Pair<Integer, Integer>, Tree> treeSpanMap = Generics.newHashMap();
Map<Pair<Integer, Integer>, List<Tree>> wordSpanMap = Generics.newHashMap();
for (Tree ctree : sentenceSubTrees) {
IntPair span = ctree.getSpan();
if (span != null) {
treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree);
wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves());
}
}
String[][] finalSentence;
finalSentence = new String[sentenceAnno.size()][];
Map<Pair<Integer, Integer>, String> allHeads = Generics.newHashMap();
int index = -1;
for (CoreLabel newAnno : sentenceAnno) {
index += 1;
String word = newAnno.word();
String tag = newAnno.tag();
String cat = newAnno.ner();
String coref = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
finalSentence[index] = new String[4];
finalSentence[index][0] = word;
finalSentence[index][1] = tag;
finalSentence[index][2] = cat;
finalSentence[index][3] = coref;
if (coref == null) {
sentenceInfo.put(Pair.makePair(index, index), coref);
finalSentence[index][3] = "O";
} else {
String[] allC = coref.split("\\|");
for (String corefG : allC) {
Pair<Integer, Integer> mention = getMention(index, corefG, sentenceAnno);
if (!include(sentenceInfo, mention, corefG)) {
// find largest NP in mention
sentenceInfo.put(mention, corefG);
Tree mentionTree = treeSpanMap.get(mention);
String head = null;
if (mentionTree != null) {
head = mentionTree.headTerminal(headFinder).nodeString();
} else if (mention.first.equals(mention.second)) {
head = word;
}
allHeads.put(mention, head);
}
}
if (allHeads.values().contains(word)) {
finalSentence[index][3] = "MENTION";
} else {
finalSentence[index][3] = "O";
}
}
}
for (int i = 0; i < finalSentence.length; i++) {
String[] wordInfo = finalSentence[i];
if (i < finalSentence.length - 1) {
String[] nextWordInfo = finalSentence[i + 1];
if (nextWordInfo[3].equals("MENTION") && nextWordInfo[0].equals("'s")) {
wordInfo[3] = "MENTION";
finalSentence[i + 1][3] = "O";
}
}
pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]);
}
pw.println("");
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class CorefChain method deleteMention.
/**
* Delete a mention from this coreference chain.
* @param m The mention to delete.
*/
public void deleteMention(CorefMention m) {
this.mentions.remove(m);
IntPair position = new IntPair(m.sentNum, m.headIndex);
this.mentionMap.remove(position);
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class RuleBasedCorefMentionFinder method extractNamedEntityMentions.
protected static void extractNamedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
String preNE = "O";
int beginIndex = -1;
for (CoreLabel w : sent) {
String nerString = w.ner();
if (!nerString.equals(preNE)) {
int endIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
if (!preNE.matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) {
if (w.get(CoreAnnotations.TextAnnotation.class).equals("'s") && w.tag().equals("POS")) {
endIndex++;
}
IntPair mSpan = new IntPair(beginIndex, endIndex);
// attached to the previous NER by the earlier heuristic
if (beginIndex < endIndex && !mentionSpanSet.contains(mSpan)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
mentions.add(m);
mentionSpanSet.add(mSpan);
namedEntitySpanSet.add(mSpan);
}
}
beginIndex = endIndex;
preNE = nerString;
}
}
// NE at the end of sentence
if (!preNE.matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) {
IntPair mSpan = new IntPair(beginIndex, sent.size());
if (!mentionSpanSet.contains(mSpan)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIndex, sent.size(), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIndex, sent.size())));
mentions.add(m);
mentionSpanSet.add(mSpan);
namedEntitySpanSet.add(mSpan);
}
}
}
use of edu.stanford.nlp.util.IntPair in project CoreNLP by stanfordnlp.
the class RuleBasedCorefMentionFinder method extractNPorPRP.
public void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexLeaves();
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = npOrPrpMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
while (matcher.find()) {
Tree t = matcher.getMatch();
List<Tree> mLeaves = t.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
//if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
IntPair mSpan = new IntPair(beginIdx, endIdx);
if (!mentionSpanSet.contains(mSpan) && (lang == Locale.CHINESE || !insideNE(mSpan, namedEntitySpanSet))) {
// if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t);
mentions.add(m);
mentionSpanSet.add(mSpan);
// if(m.originalSpan.size() > 1) {
// boolean isNE = true;
// for(CoreLabel cl : m.originalSpan) {
// if(!cl.tag().startsWith("NNP")) isNE = false;
// }
// if(isNE) {
// namedEntitySpanSet.add(mSpan);
// }
// }
}
}
}
Aggregations