use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method printDiscourseStructure.
private static void printDiscourseStructure(Document document) {
logger.finer("DISCOURSE STRUCTURE==============================");
logger.finer("doc type: " + document.docType);
int previousUtterIndex = -1;
String previousSpeaker = "";
StringBuilder sb = new StringBuilder();
for (CoreMap s : document.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel l : s.get(CoreAnnotations.TokensAnnotation.class)) {
int utterIndex = l.get(CoreAnnotations.UtteranceAnnotation.class);
String speaker = l.get(CoreAnnotations.SpeakerAnnotation.class);
String word = l.get(CoreAnnotations.TextAnnotation.class);
if (previousUtterIndex != utterIndex) {
try {
int previousSpeakerID = Integer.parseInt(previousSpeaker);
logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + document.allPredictedMentions.get(previousSpeakerID).spanToString());
} catch (Exception e) {
logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + previousSpeaker);
}
logger.finer(sb.toString());
sb.setLength(0);
previousUtterIndex = utterIndex;
previousSpeaker = speaker;
}
sb.append(" ").append(word);
}
sb.append("\n");
}
try {
int previousSpeakerID = Integer.parseInt(previousSpeaker);
logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + document.allPredictedMentions.get(previousSpeakerID).spanToString());
} catch (Exception e) {
logger.finer("\n<utter>: " + previousUtterIndex + " <speaker>: " + previousSpeaker);
}
logger.finer(sb.toString());
logger.finer("END OF DISCOURSE STRUCTURE==============================");
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Document method findDocType.
/** Find document type: Conversation or article */
private DocType findDocType(Dictionaries dict) {
boolean speakerChange = false;
Set<Integer> discourseWithIorYou = Generics.newHashSet();
for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
int utterIndex = w.get(CoreAnnotations.UtteranceAnnotation.class);
if (utterIndex != 0)
speakerChange = true;
if (speakerChange && utterIndex == 0)
return DocType.ARTICLE;
if (dict.firstPersonPronouns.contains(w.get(CoreAnnotations.TextAnnotation.class).toLowerCase()) || dict.secondPersonPronouns.contains(w.get(CoreAnnotations.TextAnnotation.class).toLowerCase())) {
discourseWithIorYou.add(utterIndex);
}
if (maxUtter < utterIndex)
maxUtter = utterIndex;
}
}
if (!speakerChange)
return DocType.ARTICLE;
// in conversation, utter index keep increasing.
return DocType.CONVERSATION;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Document method setParagraphAnnotation.
/** Set paragraph index */
private void setParagraphAnnotation() {
int paragraphIndex = 0;
int previousOffset = -10;
for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2)
paragraphIndex++;
w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex);
previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
} else {
w.set(CoreAnnotations.ParagraphAnnotation.class, -1);
}
}
}
for (List<Mention> l : predictedOrderedMentionsBySentence) {
for (Mention m : l) {
m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class);
}
}
numParagraph = paragraphIndex;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Document method findSpeaker.
private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) {
List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
for (int i = startIndex; i < endIndex; i++) {
if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0)
continue;
String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
if (dict.reportVerb.contains(lemma)) {
// find subject
SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
IndexedWord w = dependency.getNodeByWordPattern(word);
if (w != null) {
for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
if (child.first().getShortName().equals("nsubj")) {
String subjectString = child.second().word();
// start from 1
int subjectIndex = child.second().index();
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, sentNum);
headPosition.set(1, subjectIndex - 1);
String speaker;
if (mentionheadPositions.containsKey(headPosition)) {
speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
} else {
speaker = subjectString;
}
speakers.put(utterNum, speaker);
return true;
}
}
} else {
SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
}
}
}
return false;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Rules method entitySameProperHeadLastWord.
/** Check whether two mentions have the same proper head words */
public static boolean entitySameProperHeadLastWord(Mention m, Mention a) {
if (!m.headString.equalsIgnoreCase(a.headString) || !m.sentenceWords.get(m.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP") || !a.sentenceWords.get(a.headIndex).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
return false;
}
if (!m.removePhraseAfterHead().toLowerCase().endsWith(m.headString) || !a.removePhraseAfterHead().toLowerCase().endsWith(a.headString)) {
return false;
}
Set<String> mProperNouns = Generics.newHashSet();
Set<String> aProperNouns = Generics.newHashSet();
for (CoreLabel w : m.sentenceWords.subList(m.startIndex, m.headIndex)) {
if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
mProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
}
}
for (CoreLabel w : a.sentenceWords.subList(a.startIndex, a.headIndex)) {
if (w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
aProperNouns.add(w.get(CoreAnnotations.TextAnnotation.class));
}
}
boolean mHasExtra = false;
boolean aHasExtra = false;
for (String s : mProperNouns) {
if (!aProperNouns.contains(s)) {
mHasExtra = true;
break;
}
}
for (String s : aProperNouns) {
if (!mProperNouns.contains(s)) {
aHasExtra = true;
break;
}
}
if (mHasExtra && aHasExtra)
return false;
return true;
}
Aggregations