use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CoNLL2011DocumentReader method getMention.
public static Pair<Integer, Integer> getMention(Integer index, String corefG, List<CoreLabel> sentenceAnno) {
Integer i = -1;
Integer end = index;
for (CoreLabel newAnno : sentenceAnno) {
i += 1;
if (i > index) {
String corefS = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
if (corefS != null) {
String[] allC = corefS.split("\\|");
if (Arrays.asList(allC).contains(corefG)) {
end = i;
} else {
break;
}
} else {
break;
}
}
}
return Pair.makePair(index, end);
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CoNLLMentionExtractor method recallErrors.
private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
int numSentences = goldMentions.size();
for (int i = 0; i < numSentences; i++) {
CoreMap coreMap = coreMaps.get(i);
List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
List<Mention> goldMentionsSent = goldMentions.get(i);
List<Pair<Integer, Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
for (Pair<Integer, Integer> mentionSpan : goldMentionsSpans) {
logger.finer("RECALL ERROR\n");
logger.finer(coreMap + "\n");
for (int x = mentionSpan.first; x < mentionSpan.second; x++) {
logger.finer(words.get(x).value() + " ");
}
logger.finer("\n" + tree + "\n");
}
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CoNLLMentionExtractor method nextDoc.
@Override
public Document nextDoc() throws Exception {
List<List<CoreLabel>> allWords = new ArrayList<>();
List<Tree> allTrees = new ArrayList<>();
CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument();
if (conllDoc == null) {
return null;
}
Annotation anno = conllDoc.getAnnotation();
List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
// Remove tree from annotation and replace with parse using stanford parser
sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
} else {
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (LEMMATIZE) {
treeLemmatizer.transformTree(tree);
}
// generate the dependency graph
try {
SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.ENHANCED, GrammaticalStructure.Extras.NONE);
SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, deps);
} catch (Exception e) {
logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
}
}
}
String preSpeaker = null;
int utterance = -1;
for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) {
if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
token.set(CoreAnnotations.SpeakerAnnotation.class, "");
}
String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class);
if (!curSpeaker.equals(preSpeaker)) {
utterance++;
preSpeaker = curSpeaker;
}
token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
}
// Run pipeline
stanfordProcessor.annotate(anno);
for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class));
allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
}
// Initialize gold mentions
List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc);
List<List<Mention>> allPredictedMentions;
if (Constants.USE_GOLD_MENTIONS) {
//allPredictedMentions = allGoldMentions;
// Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
allPredictedMentions = makeCopy(allGoldMentions);
} else if (Constants.USE_GOLD_MENTION_BOUNDARIES) {
allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder).filterPredictedMentions(allGoldMentions, anno, dictionaries);
} else {
allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);
}
try {
recallErrors(allGoldMentions, allPredictedMentions, anno);
} catch (IOException e) {
throw new RuntimeException(e);
}
Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
doc.conllDoc = conllDoc;
return doc;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class MentionDetectionClassifier method extractFeatures.
public static Counter<String> extractFeatures(Mention p, Set<Mention> shares, Set<String> neStrings, Dictionaries dict, Properties props) {
Counter<String> features = new ClassicCounter<>();
String span = p.lowercaseNormalizedSpanString();
String ner = p.headWord.ner();
int sIdx = p.startIndex;
int eIdx = p.endIndex;
List<CoreLabel> sent = p.sentenceWords;
CoreLabel preWord = (sIdx == 0) ? null : sent.get(sIdx - 1);
CoreLabel nextWord = (eIdx == sent.size()) ? null : sent.get(eIdx);
CoreLabel firstWord = p.originalSpan.get(0);
CoreLabel lastWord = p.originalSpan.get(p.originalSpan.size() - 1);
features.incrementCount("B-NETYPE-" + ner);
if (neStrings.contains(span)) {
features.incrementCount("B-NE-STRING-EXIST");
if ((preWord == null || !preWord.ner().equals(ner)) && (nextWord == null || !nextWord.ner().equals(ner))) {
features.incrementCount("B-NE-FULLSPAN");
}
}
if (preWord != null)
features.incrementCount("B-PRECEDINGWORD-" + preWord.word());
if (nextWord != null)
features.incrementCount("B-FOLLOWINGWORD-" + nextWord.word());
if (preWord != null)
features.incrementCount("B-PRECEDINGPOS-" + preWord.tag());
if (nextWord != null)
features.incrementCount("B-FOLLOWINGPOS-" + nextWord.tag());
features.incrementCount("B-FIRSTWORD-" + firstWord.word());
features.incrementCount("B-FIRSTPOS-" + firstWord.tag());
features.incrementCount("B-LASTWORD-" + lastWord.word());
features.incrementCount("B-LASTWORD-" + lastWord.tag());
for (Mention s : shares) {
if (s == p)
continue;
if (s.insideIn(p)) {
features.incrementCount("B-BIGGER-THAN-ANOTHER");
break;
}
}
for (Mention s : shares) {
if (s == p)
continue;
if (p.insideIn(s)) {
features.incrementCount("B-SMALLER-THAN-ANOTHER");
break;
}
}
return features;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class MentionDetectionClassifier method classifyMentions.
public void classifyMentions(List<List<Mention>> predictedMentions, Dictionaries dict, Properties props) {
Set<String> neStrings = Generics.newHashSet();
for (List<Mention> predictedMention : predictedMentions) {
for (Mention m : predictedMention) {
String ne = m.headWord.ner();
if (ne.equals("O"))
continue;
for (CoreLabel cl : m.originalSpan) {
if (!cl.ner().equals(ne))
continue;
}
neStrings.add(m.lowercaseNormalizedSpanString());
}
}
for (List<Mention> predicts : predictedMentions) {
Map<Integer, Set<Mention>> headPositions = Generics.newHashMap();
for (Mention p : predicts) {
if (!headPositions.containsKey(p.headIndex))
headPositions.put(p.headIndex, Generics.newHashSet());
headPositions.get(p.headIndex).add(p);
}
Set<Mention> remove = Generics.newHashSet();
for (int hPos : headPositions.keySet()) {
Set<Mention> shares = headPositions.get(hPos);
if (shares.size() > 1) {
Counter<Mention> probs = new ClassicCounter<>();
for (Mention p : shares) {
double trueProb = probabilityOf(p, shares, neStrings, dict, props);
probs.incrementCount(p, trueProb);
}
// add to remove
Mention keep = Counters.argmax(probs, (m1, m2) -> m1.spanToString().compareTo(m2.spanToString()));
probs.remove(keep);
remove.addAll(probs.keySet());
}
}
for (Mention r : remove) {
predicts.remove(r);
}
}
}
Aggregations