use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findParagraphSpeaker.
private static String findParagraphSpeaker(Document doc, List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
if (!doc.speakers.containsKey(paragraphUtterIndex)) {
if (!nextParagraphSpeaker.isEmpty()) {
doc.speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
} else {
// cdm [Sept 2015] added this check to try to avoid crash
if (paragraph.isEmpty()) {
Redwood.log("debug-preprocessor", "Empty paragraph; skipping findParagraphSpeaker");
return "";
}
CoreMap lastSent = paragraph.get(paragraph.size() - 1);
String speaker = "";
boolean hasVerb = false;
for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
if (pos.startsWith("V")) {
hasVerb = true;
break;
}
if (ner.startsWith("PER")) {
IntTuple headPosition = new IntTuple(2);
headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
headPosition.set(1, i);
if (doc.mentionheadPositions.containsKey(headPosition)) {
speaker = Integer.toString(doc.mentionheadPositions.get(headPosition).mentionID);
}
}
}
if (!hasVerb && !speaker.equals("")) {
doc.speakers.put(paragraphUtterIndex, speaker);
}
}
}
return findNextParagraphSpeaker(doc, paragraph, paragraphOffset, dict);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findSpeakersInConversation.
private static void findSpeakersInConversation(Document doc, Dictionaries dict) {
for (List<Mention> l : doc.predictedMentions) {
for (Mention m : l) {
if (m.predicateNominatives == null)
continue;
for (Mention a : m.predicateNominatives) {
if (a.spanToString().toLowerCase().equals("i")) {
doc.speakers.put(m.headWord.get(CoreAnnotations.UtteranceAnnotation.class), Integer.toString(m.mentionID));
}
}
}
}
List<CoreMap> paragraph = new ArrayList<>();
int paragraphUtterIndex = 0;
String nextParagraphSpeaker = "";
int paragraphOffset = 0;
for (CoreMap sent : doc.annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
paragraph.add(sent);
int currentUtter = sent.get(CoreAnnotations.TokensAnnotation.class).get(0).get(CoreAnnotations.UtteranceAnnotation.class);
if (paragraphUtterIndex != currentUtter) {
nextParagraphSpeaker = findParagraphSpeaker(doc, paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
paragraphUtterIndex = currentUtter;
paragraphOffset += paragraph.size();
paragraph = new ArrayList<>();
}
}
findParagraphSpeaker(doc, paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class ChineseHcorefDemo method main.
public static void main(String[] args) throws Exception {
long startTime = System.currentTimeMillis();
String text = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , " + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , " + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。";
args = new String[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" };
Annotation document = new Annotation(text);
Properties props = StringUtils.argsToProperties(args);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(document);
System.out.println("---");
System.out.println("coref chains");
for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
System.out.println("\t" + cc);
}
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println("---");
System.out.println("mentions");
for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
System.out.println("\t" + m);
}
}
long endTime = System.currentTimeMillis();
long time = (endTime - startTime) / 1000;
System.out.println("Running time " + time / 60 + "min " + time % 60 + "s");
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class CoNLLMentionExtractor method recallErrors.
private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
int numSentences = goldMentions.size();
for (int i = 0; i < numSentences; i++) {
CoreMap coreMap = coreMaps.get(i);
List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
List<Mention> goldMentionsSent = goldMentions.get(i);
List<Pair<Integer, Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
for (Pair<Integer, Integer> mentionSpan : goldMentionsSpans) {
logger.finer("RECALL ERROR\n");
logger.finer(coreMap + "\n");
for (int x = mentionSpan.first; x < mentionSpan.second; x++) {
logger.finer(words.get(x).value() + " ");
}
logger.finer("\n" + tree + "\n");
}
}
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class CoNLLMentionExtractor method nextDoc.
@Override
public Document nextDoc() throws Exception {
List<List<CoreLabel>> allWords = new ArrayList<>();
List<Tree> allTrees = new ArrayList<>();
CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument();
if (conllDoc == null) {
return null;
}
Annotation anno = conllDoc.getAnnotation();
List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
// Remove tree from annotation and replace with parse using stanford parser
sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
} else {
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (LEMMATIZE) {
treeLemmatizer.transformTree(tree);
}
// generate the dependency graph
try {
SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.ENHANCED, GrammaticalStructure.Extras.NONE);
SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, deps);
} catch (Exception e) {
logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
}
}
}
String preSpeaker = null;
int utterance = -1;
for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) {
if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
token.set(CoreAnnotations.SpeakerAnnotation.class, "");
}
String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class);
if (!curSpeaker.equals(preSpeaker)) {
utterance++;
preSpeaker = curSpeaker;
}
token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
}
// Run pipeline
stanfordProcessor.annotate(anno);
for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class));
allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
}
// Initialize gold mentions
List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc);
List<List<Mention>> allPredictedMentions;
if (Constants.USE_GOLD_MENTIONS) {
//allPredictedMentions = allGoldMentions;
// Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
allPredictedMentions = makeCopy(allGoldMentions);
} else if (Constants.USE_GOLD_MENTION_BOUNDARIES) {
allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder).filterPredictedMentions(allGoldMentions, anno, dictionaries);
} else {
allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);
}
try {
recallErrors(allGoldMentions, allPredictedMentions, anno);
} catch (IOException e) {
throw new RuntimeException(e);
}
Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
doc.conllDoc = conllDoc;
return doc;
}
Aggregations