use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class CoNLLOutputter method print.
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
// vv A bunch of nonsense to get tokens vv
if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph depTree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
for (int i = 0; i < tokens.size(); ++i) {
// Newline if applicable
if (i > 0) {
writer.println();
}
// Try to get the incoming dependency edge
int head = -1;
String deprel = null;
if (depTree != null) {
Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
if (node != null) {
List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
if (!edgeList.isEmpty()) {
assert edgeList.size() == 1;
head = edgeList.get(0).getGovernor().index();
deprel = edgeList.get(0).getRelation().toString();
} else if (rootSet.contains(i + 1)) {
head = 0;
deprel = "ROOT";
}
}
}
// Write the token
writer.print(line(i + 1, tokens.get(i), head, deprel));
}
}
writer.println();
writer.println();
}
}
writer.flush();
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method postProcessDependencies.
@Override
protected void postProcessDependencies(List<TypedDependency> list) {
if (DEBUG) {
printListSorted("At postProcessDependencies:", list);
}
SemanticGraph sg = new SemanticGraph(list);
correctWHAttachment(sg);
list.clear();
list.addAll(sg.typedDependencies());
if (DEBUG) {
printListSorted("After correcting WH movement", list);
}
convertRel(list);
if (DEBUG) {
printListSorted("After converting rel:", list);
}
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method expandPrepConjunctions.
/**
* Expands prepositions with conjunctions such as in the sentence
* "Bill flies to and from Serbia." by copying the verb resulting
* in the following relations:
* <p/>
* {@code conj:and(flies, flies')}<br/>
* {@code case(Serbia, to)}<br/>
* {@code cc(to, and)}<br/>
* {@code conj(to, from)}<br/>
* {@code nmod(flies, Serbia)}<br/>
* {@code nmod(flies', Serbia)}<br/>
* <p/>
* The label of the conjunct relation includes the conjunction type
* because if the verb has multiple cc relations then it can be impossible
* to infer which coordination marker belongs to which conjuncts.
*
* @param sg A SemanticGraph for a sentence
*/
private static void expandPrepConjunctions(SemanticGraph sg) {
/* Semgrexes require a graph with a root. */
if (sg.getRoots().isEmpty())
return;
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = PREP_CONJP_PATTERN.matcher(sgCopy);
IndexedWord oldGov = null;
IndexedWord oldCcDep = null;
List<IndexedWord> conjDeps = Generics.newLinkedList();
while (matcher.find()) {
IndexedWord ccDep = matcher.getNode("cc");
IndexedWord conjDep = matcher.getNode("conj");
IndexedWord gov = matcher.getNode("gov");
if (oldGov != null && (!gov.equals(oldGov) || !ccDep.equals(oldCcDep))) {
expandPrepConjunction(sg, oldGov, conjDeps, oldCcDep);
conjDeps = Generics.newLinkedList();
}
oldCcDep = ccDep;
oldGov = gov;
conjDeps.add(conjDep);
}
if (oldGov != null) {
expandPrepConjunction(sg, oldGov, conjDeps, oldCcDep);
}
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method getExtras.
@Override
protected void getExtras(List<TypedDependency> list) {
SemanticGraph sg = new SemanticGraph(list);
addRef(sg);
if (DEBUG) {
printListSorted("After adding ref:", sg.typedDependencies());
}
addExtraNSubj(sg);
if (DEBUG) {
printListSorted("After adding extra nsubj:", sg.typedDependencies());
}
list.clear();
list.addAll(sg.typedDependencies());
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class StanfordCoreNLPITest method test.
public void test() throws Exception {
// create a properties that enables all the annotators
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
// run an annotation through the pipeline
String text = "Dan Ramage is working for\nMicrosoft. He's in Seattle! \n";
Annotation document = new Annotation(text);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(document);
// check that tokens are present
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(tokens);
Assert.assertEquals(12, tokens.size());
// check that sentences are present
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
Assert.assertNotNull(sentences);
Assert.assertEquals(2, sentences.size());
// check that pos, lemma and ner and parses are present
for (CoreMap sentence : sentences) {
List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(sentenceTokens);
for (CoreLabel token : sentenceTokens) {
Assert.assertNotNull(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
Assert.assertNotNull(token.get(CoreAnnotations.LemmaAnnotation.class));
Assert.assertNotNull(token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
}
// check for parse tree
Assert.assertNotNull(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
// check that dependency graph Labels have word()
SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
for (IndexedWord vertex : deps.vertexSet()) {
Assert.assertNotNull(vertex.word());
Assert.assertEquals(vertex.word(), vertex.value());
}
}
// test pretty print
StringWriter stringWriter = new StringWriter();
pipeline.prettyPrint(document, new PrintWriter(stringWriter));
String result = stringWriter.getBuffer().toString();
Assert.assertTrue("Tokens are wrong in " + result, StringUtils.find(result, "\\[Text=Dan .*PartOfSpeech=NNP Lemma=Dan NamedEntityTag=PERSON\\]"));
Assert.assertTrue("Parses are wrong in " + result, result.contains("(NP (PRP He))"));
Assert.assertTrue("Parses are wrong in " + result, result.contains("(VP (VBZ 's)"));
Assert.assertTrue("Sentence header is wrong in " + result, result.contains("Sentence #1 (7 tokens)"));
Assert.assertTrue("Dependencies are wrong in " + result, result.contains("nsubj(working-4, Ramage-2)"));
// test XML
ByteArrayOutputStream os = new ByteArrayOutputStream();
pipeline.xmlPrint(document, os);
result = new String(os.toByteArray(), "UTF-8");
Assert.assertTrue("XML header is wrong in " + result, result.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
Assert.assertTrue("XML root is wrong in " + result, result.contains("<?xml-stylesheet href=\"CoreNLP-to-HTML.xsl\" type=\"text/xsl\"?>"));
Assert.assertTrue("XML word info is wrong in " + result, StringUtils.find(result, "<token id=\"2\">\\s*" + "<word>Ramage</word>\\s*" + "<lemma>Ramage</lemma>\\s*" + "<CharacterOffsetBegin>4</CharacterOffsetBegin>\\s*" + "<CharacterOffsetEnd>10</CharacterOffsetEnd>\\s*" + "<POS>NNP</POS>\\s*" + "<NER>PERSON</NER>"));
Assert.assertTrue("XML dependencies are wrong in " + result, StringUtils.find(result, "<dep type=\"compound\">\\s*<governor idx=\"2\">" + "Ramage</governor>\\s*<dependent idx=\"1\">Dan</dependent>\\s*</dep>"));
}
Aggregations