use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method addGoldMentions.
// temporary for debug
protected static void addGoldMentions(List<CoreMap> sentences, List<Set<IntPair>> mentionSpanSetList, List<List<Mention>> predictedMentions, List<List<Mention>> allGoldMentions) {
for (int i = 0, sz = sentences.size(); i < sz; i++) {
List<Mention> mentions = predictedMentions.get(i);
CoreMap sent = sentences.get(i);
List<CoreLabel> tokens = sent.get(TokensAnnotation.class);
Set<IntPair> mentionSpanSet = mentionSpanSetList.get(i);
List<Mention> golds = allGoldMentions.get(i);
for (Mention g : golds) {
IntPair pair = new IntPair(g.startIndex, g.endIndex);
if (!mentionSpanSet.contains(pair)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, g.startIndex, g.endIndex, tokens, sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) != null ? sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) : sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), new ArrayList<>(tokens.subList(g.startIndex, g.endIndex)));
mentions.add(m);
mentionSpanSet.add(pair);
}
}
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class HybridCorefPrinter method sentenceStringWithMention.
public static String sentenceStringWithMention(int i, Document document, boolean gold, boolean printClusterID) {
StringBuilder sentStr = new StringBuilder();
List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
List<List<Mention>> allMentions;
if (gold) {
allMentions = document.goldMentions;
} else {
allMentions = document.predictedMentions;
}
// String filename = document.annotation.get()
int previousOffset = 0;
CoreMap sentence = sentences.get(i);
List<Mention> mentions = allMentions.get(i);
List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
String speaker = t.get(0).get(SpeakerAnnotation.class);
if (NumberMatchingRegex.isDecimalInteger(speaker))
speaker = speaker + ": " + document.predictedMentionsByID.get(Integer.parseInt(speaker)).spanToString();
sentStr.append("\tspeaker: " + speaker + " (" + t.get(0).get(UtteranceAnnotation.class) + ") ");
String[] tokens = new String[t.size()];
for (CoreLabel c : t) {
tokens[c.index() - 1] = c.word();
}
// if(previousOffset+2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && printClusterID) {
// sentStr.append("\n");
// }
previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
Counter<Integer> startCounts = new ClassicCounter<>();
Counter<Integer> endCounts = new ClassicCounter<>();
Map<Integer, Deque<Mention>> endMentions = Generics.newHashMap();
for (Mention m : mentions) {
// if(!gold && (document.corefClusters.get(m.corefClusterID)==null || document.corefClusters.get(m.corefClusterID).getCorefMentions().size()<=1)) {
// continue;
// }
startCounts.incrementCount(m.startIndex);
endCounts.incrementCount(m.endIndex);
if (!endMentions.containsKey(m.endIndex))
endMentions.put(m.endIndex, new ArrayDeque<>());
endMentions.get(m.endIndex).push(m);
}
for (int j = 0; j < tokens.length; j++) {
if (endMentions.containsKey(j)) {
for (Mention m : endMentions.get(j)) {
int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
id = (printClusterID) ? id : m.mentionID;
sentStr.append("]_").append(id);
}
}
for (int k = 0; k < startCounts.getCount(j); k++) {
if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
sentStr.append(" ");
sentStr.append("[");
}
if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
sentStr.append(" ");
sentStr.append(tokens[j]);
}
if (endMentions.containsKey(tokens.length)) {
for (Mention m : endMentions.get(tokens.length)) {
int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
id = (printClusterID) ? id : m.mentionID;
//append("_").append(m.mentionID);
sentStr.append("]_").append(id);
}
}
return sentStr.toString();
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method findTreeWithSpan.
private static Tree findTreeWithSpan(Tree tree, int start, int end) {
CoreLabel l = (CoreLabel) tree.label();
if (l != null && l.containsKey(CoreAnnotations.BeginIndexAnnotation.class) && l.containsKey(CoreAnnotations.EndIndexAnnotation.class)) {
int myStart = l.get(CoreAnnotations.BeginIndexAnnotation.class);
int myEnd = l.get(CoreAnnotations.EndIndexAnnotation.class);
if (start == myStart && end == myEnd) {
// found perfect match
return tree;
} else if (end < myStart) {
return null;
} else if (start >= myEnd) {
return null;
}
}
// otherwise, check inside children - a match is possible
for (Tree kid : tree.children()) {
if (kid == null)
continue;
Tree ret = findTreeWithSpan(kid, start, end);
// found matching child
if (ret != null)
return ret;
}
// no match
return null;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method convertToCoreLabels.
// This probably isn't needed now; everything is always a core label. But no-op.
private static void convertToCoreLabels(Tree tree) {
Label l = tree.label();
if (!(l instanceof CoreLabel)) {
CoreLabel cl = new CoreLabel();
cl.setValue(l.value());
tree.setLabel(cl);
}
for (Tree kid : tree.children()) {
convertToCoreLabels(kid);
}
}
Aggregations