use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method setUtteranceAndSpeakerAnnotation.
private static void setUtteranceAndSpeakerAnnotation(Document doc) {
doc.speakerInfoGiven = false;
int utterance = 0;
// the utterance of outside of quotation
int outsideQuoteUtterance = 0;
boolean insideQuotation = false;
List<CoreLabel> tokens = doc.annotation.get(CoreAnnotations.TokensAnnotation.class);
String preSpeaker = (tokens.size() > 0) ? tokens.get(0).get(CoreAnnotations.SpeakerAnnotation.class) : null;
for (CoreLabel l : tokens) {
String curSpeaker = l.get(CoreAnnotations.SpeakerAnnotation.class);
String w = l.get(CoreAnnotations.TextAnnotation.class);
if (curSpeaker != null && !curSpeaker.equals("-"))
doc.speakerInfoGiven = true;
boolean speakerChange = doc.speakerInfoGiven && curSpeaker != null && !curSpeaker.equals(preSpeaker);
boolean quoteStart = w.equals("``") || (!insideQuotation && w.equals("\""));
boolean quoteEnd = w.equals("''") || (insideQuotation && w.equals("\""));
if (speakerChange) {
if (quoteStart) {
utterance = doc.maxUtter + 1;
outsideQuoteUtterance = utterance + 1;
} else {
utterance = doc.maxUtter + 1;
outsideQuoteUtterance = utterance;
}
preSpeaker = curSpeaker;
} else {
if (quoteStart) {
utterance = doc.maxUtter + 1;
}
}
if (quoteEnd) {
utterance = outsideQuoteUtterance;
insideQuotation = false;
}
if (doc.maxUtter < utterance)
doc.maxUtter = utterance;
l.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
// quote start got outside utterance idx
if (quoteStart)
l.set(CoreAnnotations.UtteranceAnnotation.class, outsideQuoteUtterance);
boolean noSpeakerInfo = !l.containsKey(CoreAnnotations.SpeakerAnnotation.class) || l.get(CoreAnnotations.SpeakerAnnotation.class).equals("") || l.get(CoreAnnotations.SpeakerAnnotation.class).startsWith("PER");
if (noSpeakerInfo || insideQuotation) {
l.set(CoreAnnotations.SpeakerAnnotation.class, "PER" + utterance);
}
if (quoteStart)
insideQuotation = true;
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findSpeakers.
/** Speaker extraction */
private static void findSpeakers(Document doc, Dictionaries dict) {
Boolean useMarkedDiscourseBoolean = doc.annotation.get(CoreAnnotations.UseMarkedDiscourseAnnotation.class);
boolean useMarkedDiscourse = (useMarkedDiscourseBoolean != null) ? useMarkedDiscourseBoolean : false;
if (!useMarkedDiscourse) {
if (doc.docType == DocType.CONVERSATION)
findSpeakersInConversation(doc, dict);
else if (doc.docType == DocType.ARTICLE)
findSpeakersInArticle(doc, dict);
}
for (CoreMap sent : doc.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class);
if (!doc.speakers.containsKey(utterIndex)) {
doc.speakers.put(utterIndex, w.get(CoreAnnotations.SpeakerAnnotation.class));
}
}
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findParagraphSpeaker.
private static String findParagraphSpeaker(Document doc, List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
if (!doc.speakers.containsKey(paragraphUtterIndex)) {
if (!nextParagraphSpeaker.isEmpty()) {
doc.speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
} else {
// cdm [Sept 2015] added this check to try to avoid crash
if (paragraph.isEmpty()) {
Redwood.log("debug-preprocessor", "Empty paragraph; skipping findParagraphSpeaker");
return "";
}
CoreMap lastSent = paragraph.get(paragraph.size() - 1);
String speaker = "";
boolean hasVerb = false;
for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
if (pos.startsWith("V")) {
hasVerb = true;
break;
}
if (ner.startsWith("PER")) {
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
headPosition.set(1, i);
if (doc.mentionheadPositions.containsKey(headPosition)) {
speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
}
}
}
if (!hasVerb && !speaker.equals("")) {
doc.speakers.put(paragraphUtterIndex, speaker);
}
}
}
return findNextParagraphSpeaker(doc, paragraph, paragraphOffset, dict);
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CorefRules method entitySameProperHeadLastWord.
/** Check whether two mentions have the same proper head words */
public static boolean entitySameProperHeadLastWord(Mention m, Mention a) {
if (!m.headString.equalsIgnoreCase(a.headString) || !m.sentenceWords.get(m.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP") || !a.sentenceWords.get(a.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
return false;
}
if (!m.removePhraseAfterHead().toLowerCase().endsWith(m.headString) || !a.removePhraseAfterHead().toLowerCase().endsWith(a.headString)) {
return false;
}
Set<String> mProperNouns = Generics.newHashSet();
Set<String> aProperNouns = Generics.newHashSet();
for (CoreLabel w : m.sentenceWords.subList(m.startIndex, m.headIndex)) {
if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
mProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
}
}
for (CoreLabel w : a.sentenceWords.subList(a.startIndex, a.headIndex)) {
if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
aProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
}
}
boolean mHasExtra = false;
boolean aHasExtra = false;
for (String s : mProperNouns) {
if (!aProperNouns.contains(s)) {
mHasExtra = true;
break;
}
}
for (String s : aProperNouns) {
if (!mProperNouns.contains(s)) {
aHasExtra = true;
break;
}
}
if (mHasExtra && aHasExtra)
return false;
return true;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class ACEMentionExtractor method extractGoldMentions.
private void extractGoldMentions(CoreMap s, List<List<Mention>> allGoldMentions, EntityComparator comparator) {
List<Mention> goldMentions = new ArrayList<>();
allGoldMentions.add(goldMentions);
List<EntityMention> goldMentionList = s.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
List<CoreLabel> words = s.get(CoreAnnotations.TokensAnnotation.class);
TreeSet<EntityMention> treeForSortGoldMentions = new TreeSet<>(comparator);
if (goldMentionList != null)
treeForSortGoldMentions.addAll(goldMentionList);
if (!treeForSortGoldMentions.isEmpty()) {
for (EntityMention e : treeForSortGoldMentions) {
Mention men = new Mention();
men.dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
if (men.dependency == null) {
men.dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
}
men.startIndex = e.getExtentTokenStart();
men.endIndex = e.getExtentTokenEnd();
String[] parseID = e.getObjectId().split("-");
men.mentionID = Integer.parseInt(parseID[parseID.length - 1]);
String[] parseCorefID = e.getCorefID().split("-E");
men.goldCorefClusterID = Integer.parseInt(parseCorefID[parseCorefID.length - 1]);
men.originalRef = -1;
for (int j = allGoldMentions.size() - 1; j >= 0; j--) {
List<Mention> l = allGoldMentions.get(j);
for (int k = l.size() - 1; k >= 0; k--) {
Mention m = l.get(k);
if (men.goldCorefClusterID == m.goldCorefClusterID) {
men.originalRef = m.mentionID;
}
}
}
goldMentions.add(men);
if (men.mentionID > maxID)
maxID = men.mentionID;
// set ner type
for (int j = e.getExtentTokenStart(); j < e.getExtentTokenEnd(); j++) {
CoreLabel word = words.get(j);
String ner = e.getType() + "-" + e.getSubType();
if (Constants.USE_GOLD_NE) {
word.set(CoreAnnotations.EntityTypeAnnotation.class, e.getMentionType());
if (e.getMentionType().equals("NAM"))
word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
}
}
}
}
}
Aggregations