use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalDependenciesFeatureAnnotator method main.
public static void main(String[] args) throws IOException {
if (args.length < 2) {
log.info("Usage: ");
log.info("java ");
log.info(UniversalDependenciesFeatureAnnotator.class.getCanonicalName());
log.info(" CoNLL-U_file tree_file [-addUPOS -escapeParenthesis]");
return;
}
String coNLLUFile = args[0];
String treeFile = args[1];
boolean addUPOS = false;
boolean escapeParens = false;
for (int i = 2; i < args.length; i++) {
if (args[i].equals("-addUPOS")) {
addUPOS = true;
} else if (args[i].equals("-escapeParenthesis")) {
escapeParens = true;
}
}
UniversalDependenciesFeatureAnnotator featureAnnotator = new UniversalDependenciesFeatureAnnotator();
Reader r = IOUtils.readerFromString(coNLLUFile);
CoNLLUDocumentReader depReader = new CoNLLUDocumentReader();
CoNLLUDocumentWriter depWriter = new CoNLLUDocumentWriter();
Iterator<SemanticGraph> it = depReader.getIterator(r);
Iterator<Tree> treeIt = treebankIterator(treeFile);
while (it.hasNext()) {
SemanticGraph sg = it.next();
Tree t = treeIt.next();
if (t == null || t.yield().size() != sg.size()) {
StringBuilder sentenceSb = new StringBuilder();
for (IndexedWord word : sg.vertexListSorted()) {
sentenceSb.append(word.get(CoreAnnotations.TextAnnotation.class));
sentenceSb.append(' ');
}
throw new RuntimeException("CoNLL-U file and tree file are not aligned. \n" + "Sentence: " + sentenceSb + '\n' + "Tree: " + t.pennString());
}
featureAnnotator.addFeatures(sg, t, true, addUPOS);
System.out.print(depWriter.printSemanticGraph(sg, !escapeParens));
}
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class TSVUtils method parseTree.
/**
* Parse a CoNLL formatted tree into a SemanticGraph.
* @param conll The CoNLL tree to parse.
* @param tokens The tokens of the sentence, to form the backing labels of the tree.
* @return A semantic graph of the sentence, according to the given tree.
*/
public static SemanticGraph parseTree(String conll, List<CoreLabel> tokens) {
SemanticGraph tree = new SemanticGraph();
if (conll == null || conll.isEmpty()) {
return tree;
}
String[] treeLines = newline.split(conll);
IndexedWord[] vertices = new IndexedWord[tokens.size() + 2];
// Add edges
for (String line : treeLines) {
// Parse row
String[] fields = tab.split(line);
int dependentIndex = Integer.parseInt(fields[0]);
if (vertices[dependentIndex] == null) {
if (dependentIndex > tokens.size()) {
// Bizarre mismatch in sizes; the malt parser seems to do this often
return new SemanticGraph();
}
vertices[dependentIndex] = new IndexedWord(tokens.get(dependentIndex - 1));
}
IndexedWord dependent = vertices[dependentIndex];
int governorIndex = Integer.parseInt(fields[1]);
if (governorIndex > tokens.size()) {
// Bizarre mismatch in sizes; the malt parser seems to do this often
return new SemanticGraph();
}
if (vertices[governorIndex] == null && governorIndex > 0) {
vertices[governorIndex] = new IndexedWord(tokens.get(governorIndex - 1));
}
IndexedWord governor = vertices[governorIndex];
String relation = fields[2];
// Process row
if (governorIndex == 0) {
tree.addRoot(dependent);
} else {
tree.addVertex(dependent);
if (!tree.containsVertex(governor)) {
tree.addVertex(governor);
}
if (!"ref".equals(relation)) {
tree.addEdge(governor, dependent, GrammaticalRelation.valueOf(Language.English, relation), Double.NEGATIVE_INFINITY, false);
}
}
}
return tree;
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class ProtobufAnnotationSerializer method fromProto.
/**
* Return a {@link RelationTriple} object from the serialized representation.
* This requires a sentence and a document so that
* (1) we have a docid for the dependency tree can be accurately rebuilt,
* and (2) we have references to the tokens to include in the relation triple.
*
* @param proto The serialized relation triples.
* @param doc The document we are deserializing. This document should already
* have a docid annotation set, if there is one.
* @param sentenceIndex The index of the sentence this extraction should be attached to.
*
* @return A relation triple as a Java object, corresponding to the seriaized proto.
*/
public static RelationTriple fromProto(CoreNLPProtos.RelationTriple proto, Annotation doc, int sentenceIndex) {
if (Thread.interrupted()) {
throw new RuntimeInterruptedException();
}
// Get the spans for the extraction
List<CoreLabel> subject = proto.getSubjectTokensList().stream().map(loc -> doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())).collect(Collectors.toList());
List<CoreLabel> relation;
if (proto.getRelationTokensCount() == 0) {
// If we don't have a real span for the relation, make a dummy word
relation = Collections.singletonList(new CoreLabel(new Word(proto.getRelation())));
} else {
relation = proto.getRelationTokensList().stream().map(loc -> doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())).collect(Collectors.toList());
}
List<CoreLabel> object = proto.getObjectTokensList().stream().map(loc -> doc.get(SentencesAnnotation.class).get(loc.getSentenceIndex()).get(TokensAnnotation.class).get(loc.getTokenIndex())).collect(Collectors.toList());
// Create the extraction
RelationTriple extraction;
double confidence = proto.getConfidence();
if (proto.hasTree()) {
SemanticGraph tree = fromProto(proto.getTree(), doc.get(SentencesAnnotation.class).get(sentenceIndex).get(TokensAnnotation.class), doc.get(DocIDAnnotation.class), Optional.of(doc));
extraction = new RelationTriple.WithTree(subject, relation, object, tree, confidence);
} else {
extraction = new RelationTriple(subject, relation, object, confidence);
}
// Tweak the extraction
if (proto.hasIstmod()) {
extraction.istmod(proto.getIstmod());
}
if (proto.hasPrefixBe()) {
extraction.isPrefixBe(proto.getPrefixBe());
}
if (proto.hasSuffixBe()) {
extraction.isSuffixBe(proto.getSuffixBe());
}
if (proto.hasSuffixOf()) {
extraction.isSuffixOf(proto.getSuffixOf());
}
// Return
return extraction;
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class DependencyParseAnnotator method doOneSentence.
@Override
protected void doOneSentence(Annotation annotation, CoreMap sentence) {
GrammaticalStructure gs = parser.predict(sentence);
SemanticGraph deps = SemanticGraphFactory.makeFromTree(gs, Mode.COLLAPSED, extraDependencies, null), uncollapsedDeps = SemanticGraphFactory.makeFromTree(gs, Mode.BASIC, extraDependencies, null), ccDeps = SemanticGraphFactory.makeFromTree(gs, Mode.CCPROCESSED, extraDependencies, null), enhancedDeps = SemanticGraphFactory.makeFromTree(gs, Mode.ENHANCED, extraDependencies, null), enhancedPlusPlusDeps = SemanticGraphFactory.makeFromTree(gs, Mode.ENHANCED_PLUS_PLUS, extraDependencies, null);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, enhancedPlusPlusDeps);
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class DeterministicCorefAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
try {
List<Tree> trees = new ArrayList<>();
List<List<CoreLabel>> sentences = new ArrayList<>();
// extract trees and sentence words
// we are only supporting the new annotation standard for this Annotator!
boolean hasSpeakerAnnotations = false;
if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
// int sentNum = 0;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
sentences.add(tokens);
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
trees.add(tree);
// locking here is crucial for correct threading!
SemanticGraph dependencies = SemanticGraphFactory.makeFromTree(tree, Mode.COLLAPSED, Extras.NONE, null, true);
sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, dependencies);
if (!hasSpeakerAnnotations) {
// check for speaker annotations
for (CoreLabel t : tokens) {
if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) {
hasSpeakerAnnotations = true;
break;
}
}
}
MentionExtractor.mergeLabels(tree, tokens);
MentionExtractor.initializeUtterance(tokens);
}
} else {
log.error("this coreference resolution system requires SentencesAnnotation!");
return;
}
if (hasSpeakerAnnotations) {
annotation.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
}
// extract all possible mentions
// this is created for each new annotation because it is not threadsafe
RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing);
List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(annotation, 0, corefSystem.dictionaries());
// add the relevant info to mentions and order them for coref
Document document = mentionExtractor.arrange(annotation, sentences, trees, allUnprocessedMentions);
List<List<Mention>> orderedMentions = document.getOrderedMentions();
if (VERBOSE) {
for (int i = 0; i < orderedMentions.size(); i++) {
System.err.printf("Mentions in sentence #%d:%n", i);
for (int j = 0; j < orderedMentions.get(i).size(); j++) {
log.info("\tMention #" + j + ": " + orderedMentions.get(i).get(j).spanToString());
}
}
}
Map<Integer, edu.stanford.nlp.coref.data.CorefChain> result = corefSystem.corefReturnHybridOutput(document);
annotation.set(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefChainAnnotation.class, result);
if (OLD_FORMAT) {
Map<Integer, CorefChain> oldResult = corefSystem.coref(document);
addObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult);
}
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Aggregations