use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Mention method lowestNPIncludesHead.
public String lowestNPIncludesHead() {
String ret = "";
Tree head = this.contextParseTree.getLeaves().get(this.headIndex);
Tree lowestNP = head;
String s;
while (true) {
if (lowestNP == null)
return ret;
s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class);
if (s.equals("NP") || s.equals("ROOT"))
break;
lowestNP = lowestNP.ancestor(1, this.contextParseTree);
}
if (s.equals("ROOT"))
lowestNP = head;
for (Tree t : lowestNP.getLeaves()) {
if (!ret.equals(""))
ret = ret + " ";
ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class);
}
if (!this.spanToString().contains(ret))
return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class);
return ret;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Mention method nerTokens.
// Retrieves part of the span that corresponds to the NER (going out from head)
public List<CoreLabel> nerTokens() {
if (nerString == null || "O".equals(nerString))
return null;
int start = headIndex - startIndex;
int end = headIndex - startIndex + 1;
while (start > 0) {
CoreLabel prev = originalSpan.get(start - 1);
if (nerString.equals(prev.ner())) {
start--;
} else {
break;
}
}
while (end < originalSpan.size()) {
CoreLabel next = originalSpan.get(end);
if (nerString.equals(next.ner())) {
end++;
} else {
break;
}
}
return originalSpan.subList(start, end);
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReader method writeTabSep.
public static void writeTabSep(PrintWriter pw, CoreMap sentence, CollectionValuedMap<String, CoreMap> chainmap) {
HeadFinder headFinder = new ModCollinsHeadFinder();
List<CoreLabel> sentenceAnno = sentence.get(CoreAnnotations.TokensAnnotation.class);
Tree sentenceTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
Map<Pair<Integer, Integer>, String> sentenceInfo = Generics.newHashMap();
Set<Tree> sentenceSubTrees = sentenceTree.subTrees();
sentenceTree.setSpans();
Map<Pair<Integer, Integer>, Tree> treeSpanMap = Generics.newHashMap();
Map<Pair<Integer, Integer>, List<Tree>> wordSpanMap = Generics.newHashMap();
for (Tree ctree : sentenceSubTrees) {
IntPair span = ctree.getSpan();
if (span != null) {
treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree);
wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves());
}
}
String[][] finalSentence;
finalSentence = new String[sentenceAnno.size()][];
Map<Pair<Integer, Integer>, String> allHeads = Generics.newHashMap();
int index = -1;
for (CoreLabel newAnno : sentenceAnno) {
index += 1;
String word = newAnno.word();
String tag = newAnno.tag();
String cat = newAnno.ner();
String coref = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
finalSentence[index] = new String[4];
finalSentence[index][0] = word;
finalSentence[index][1] = tag;
finalSentence[index][2] = cat;
finalSentence[index][3] = coref;
if (coref == null) {
sentenceInfo.put(Pair.makePair(index, index), coref);
finalSentence[index][3] = "O";
} else {
String[] allC = coref.split("\\|");
for (String corefG : allC) {
Pair<Integer, Integer> mention = getMention(index, corefG, sentenceAnno);
if (!include(sentenceInfo, mention, corefG)) {
// find largest NP in mention
sentenceInfo.put(mention, corefG);
Tree mentionTree = treeSpanMap.get(mention);
String head = null;
if (mentionTree != null) {
head = mentionTree.headTerminal(headFinder).nodeString();
} else if (mention.first.equals(mention.second)) {
head = word;
}
allHeads.put(mention, head);
}
}
if (allHeads.values().contains(word)) {
finalSentence[index][3] = "MENTION";
} else {
finalSentence[index][3] = "O";
}
}
}
for (int i = 0; i < finalSentence.length; i++) {
String[] wordInfo = finalSentence[i];
if (i < finalSentence.length - 1) {
String[] nextWordInfo = finalSentence[i + 1];
if (nextWordInfo[3].equals("MENTION") && nextWordInfo[0].equals("'s")) {
wordInfo[3] = "MENTION";
finalSentence[i + 1][3] = "O";
}
}
pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]);
}
pw.println("");
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DocumentMaker method makeDocument.
public Document makeDocument(InputDoc input) throws Exception {
List<List<Mention>> mentions = new ArrayList<>();
if (CorefProperties.useGoldMentions(props)) {
List<CoreMap> sentences = input.annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (int i = 0; i < sentences.size(); i++) {
CoreMap sentence = sentences.get(i);
List<CoreLabel> sentenceWords = sentence.get(CoreAnnotations.TokensAnnotation.class);
List<Mention> sentenceMentions = new ArrayList<>();
mentions.add(sentenceMentions);
for (Mention g : input.goldMentions.get(i)) {
sentenceMentions.add(new Mention(-1, g.startIndex, g.endIndex, sentenceWords, null, null, new ArrayList<>(sentenceWords.subList(g.startIndex, g.endIndex))));
}
md.findHead(sentence, sentenceMentions);
}
} else {
for (CoreMap sentence : input.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
mentions.add(sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class));
}
}
Document doc = new Document(input, mentions);
if (input.goldMentions != null) {
findGoldMentionHeads(doc);
}
DocumentPreprocessor.preprocess(doc, dict, null, headFinder);
return doc;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method setParagraphAnnotation.
/** Set paragraph index */
private static void setParagraphAnnotation(Document doc) {
int paragraphIndex = 0;
int previousOffset = -10;
for (CoreMap sent : doc.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2)
paragraphIndex++;
w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex);
previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
} else {
w.set(CoreAnnotations.ParagraphAnnotation.class, -1);
}
}
}
for (List<Mention> l : doc.predictedMentions) {
for (Mention m : l) {
m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class);
}
}
doc.numParagraph = paragraphIndex;
}
Aggregations