use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class CustomAnnotationSerializer method saveDependencyGraph.
/**
* Saves all arcs in the graph on two lines: first line contains the vertices, second the edges.
* @param graph
* @param pw
*/
private static void saveDependencyGraph(SemanticGraph graph, PrintWriter pw) {
if (graph == null) {
pw.println();
pw.println();
return;
}
boolean outputHeader = false;
for (IndexedWord node : graph.vertexSet()) {
// indicate: docid, sentence index
if (!outputHeader) {
String docId = node.get(CoreAnnotations.DocIDAnnotation.class);
if (docId != null && docId.length() > 0)
pw.print(docId);
else
pw.print("-");
pw.print("\t");
pw.print(node.get(CoreAnnotations.SentenceIndexAnnotation.class));
outputHeader = true;
}
pw.print("\t");
pw.print(node.index());
// These annotations are usually not set, so print them only if necessary
if (node.copyCount() > 0) {
pw.print("-");
pw.print(node.copyCount());
// System.out.println("FOUND COPY ANNOTATION: " + node.get(CoreAnnotations.CopyAnnotation.class));
}
if (graph.getRoots().contains(node)) {
if (node.copyCount() > 0) {
pw.print("-R");
} else {
pw.print("-0-R");
}
}
}
pw.println();
// second line: all edges
boolean first = true;
for (SemanticGraphEdge edge : graph.edgeIterable()) {
if (!first)
pw.print("\t");
String rel = edge.getRelation().toString();
// no spaces allowed in the relation name
// note that they might occur due to the tokenization of HTML/XML/RDF tags
rel = rel.replaceAll("\\s+", "");
pw.print(rel);
pw.print(" ");
pw.print(edge.getSource().index());
pw.print(" ");
pw.print(edge.getTarget().index());
if (edge.isExtra() || edge.getSource().copyCount() > 0 || edge.getTarget().copyCount() > 0) {
pw.print(" ");
pw.print(edge.isExtra());
pw.print(" ");
pw.print(edge.getSource().copyCount());
pw.print(" ");
pw.print(edge.getTarget().copyCount());
}
first = false;
}
pw.println();
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class CoNLLOutputter method print.
@Override
public void print(Annotation doc, OutputStream target, Options options) throws IOException {
PrintWriter writer = new PrintWriter(IOUtils.encodedOutputStreamWriter(target, options.encoding));
// vv A bunch of nonsense to get tokens vv
if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph depTree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
for (int i = 0; i < tokens.size(); ++i) {
// Newline if applicable
if (i > 0) {
writer.println();
}
// Try to get the incoming dependency edge
int head = -1;
String deprel = null;
if (depTree != null) {
Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
if (node != null) {
List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
if (!edgeList.isEmpty()) {
assert edgeList.size() == 1;
head = edgeList.get(0).getGovernor().index();
deprel = edgeList.get(0).getRelation().toString();
} else if (rootSet.contains(i + 1)) {
head = 0;
deprel = "ROOT";
}
}
}
// Write the token
writer.print(line(i + 1, tokens.get(i), head, deprel));
}
}
writer.println();
writer.println();
}
}
writer.flush();
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method addCaseMarkersToReln.
/**
* Appends case marker information to nmod/acl/advcl relations.
* <p/>
* E.g. if there is a relation {@code nmod(gov, dep)} and {@code case(dep, prep)}, then
* the {@code nmod} relation is renamed to {@code nmod:prep}.
*
*
* @param sg semantic graph
* @param gov governor of the nmod/acl/advcl relation
* @param mod modifier of the nmod/acl/advcl relation
* @param caseMarkers {@code List<IndexedWord>} of all the case markers that depend on mod
*/
private static void addCaseMarkersToReln(SemanticGraph sg, IndexedWord gov, IndexedWord mod, List<IndexedWord> caseMarkers) {
SemanticGraphEdge edge = sg.getEdge(gov, mod);
int lastCaseMarkerIndex = 0;
StringBuilder sb = new StringBuilder();
boolean firstWord = true;
for (IndexedWord cm : caseMarkers) {
/* check for adjacency */
if (lastCaseMarkerIndex == 0 || cm.index() == (lastCaseMarkerIndex + 1)) {
if (!firstWord) {
sb.append('_');
}
sb.append(cm.value());
firstWord = false;
} else {
/* Should never happen as there should be never two non-adjacent case markers.
* If it does happen nevertheless create an additional relation.
*/
GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
sg.addEdge(gov, mod, reln, Double.NEGATIVE_INFINITY, true);
sb = new StringBuilder(cm.value());
firstWord = true;
}
lastCaseMarkerIndex = cm.index();
}
GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
edge.setRelation(reln);
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method demoteQmodParentHelper.
private static void demoteQmodParentHelper(SemanticGraph sg, IndexedWord gov, IndexedWord oldHead) {
if (!sg.getRoots().contains(oldHead)) {
IndexedWord parent = sg.getParent(oldHead);
if (parent == null) {
return;
}
SemanticGraphEdge edge = sg.getEdge(parent, oldHead);
sg.addEdge(parent, gov, edge.getRelation(), edge.getWeight(), edge.isExtra());
sg.removeEdge(edge);
} else {
sg.getRoots().remove(oldHead);
sg.addRoot(gov);
}
//temporary relation to keep the graph connected
sg.addEdge(gov, oldHead, DEPENDENT, Double.NEGATIVE_INFINITY, false);
sg.removeEdge(sg.getEdge(oldHead, gov));
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project Info-Evaluation by TechnionYP5777.
the class AnalyzeParagraph method AnalyzeArrestsQuery.
public void AnalyzeArrestsQuery() {
/*
* First step is initiating the Stanford CoreNLP pipeline (the pipeline
* will be later used to evaluate the text and annotate it) Pipeline is
* initiated using a Properties object which is used for setting all
* needed entities, annotations, training data and so on, in order to
* customized the pipeline initialization to contains only the models
* you need
*/
final Properties props = new Properties();
/*
* The "annotators" property key tells the pipeline which entities
* should be initiated with our pipeline object, See
* http://nlp.stanford.edu/software/corenlp.shtml for a complete
* reference to the "annotators" values you can set here and what they
* will contribute to the analyzing process
*/
props.put("annotators", "tokenize,ssplit, pos, regexner, parse,lemma,natlog,openie");
final StanfordCoreNLP pipeLine = new StanfordCoreNLP(props);
// inputText will be the text to evaluate in this example
int index = 0;
for (final Element paragraph : this.Paragraphs) {
final String inputText = paragraph.text() + "";
final Annotation document = new Annotation(inputText);
System.out.println(document);
String reason = "";
// more details about the reason. e.g - where it
String details = "";
// happened.
String aux = "";
String prefixDetails = "";
// this string tells us what is the penalty for the arrest.
String penalty = "";
// Finally we use the pipeline to annotate the document we created
pipeLine.annotate(document);
for (final CoreMap sentence : document.get(SentencesAnnotation.class)) {
Sentence sent = new Sentence(sentence);
if (sent.text().contains("sentenced") || sent.text().contains("juried") || sent.text().contains("sent to jail") || sent.text().contains("charged")) {
penalty = ArrestPenalty(sent);
System.out.println("Sentenced for:" + penalty);
}
final SemanticGraph dependencies = sentence.get(CollapsedDependenciesAnnotation.class);
for (final IndexedWord root : dependencies.getRoots()) for (final SemanticGraphEdge edge : dependencies.getOutEdgesSorted(root)) {
final IndexedWord dep = edge.getDependent();
final String rel = edge.getRelation() + "";
if (!"arrested".equals(edge.getGovernor().word()))
switch(rel) {
case "nmod:in":
details += "in" + " " + dep.word() + " ";
break;
case "nmod:during":
details += "during" + " " + dep.word() + " ";
break;
case "nmod:at":
details += "at" + " " + dep.word() + " ";
break;
}
else {
//Finding the reason in the paragraph
if ("advcl".equals(rel) || "advcl:for".equals(rel) || "nmod:for".equals(rel)) {
for (final SemanticGraphEdge keshet : dependencies.getOutEdgesSorted(dep)) {
final String rel2 = keshet.getRelation() + "";
final IndexedWord dep2 = keshet.getDependent();
if ("amod".equals(rel2) || "dobj".equals(rel2)) {
reason += dep2.word() + " ";
try {
prefixDetails = ((sentence + "").substring(dep.beginPosition(), dep2.endPosition()));
} catch (IndexOutOfBoundsException e) {
prefixDetails = sentence + "";
}
}
if ("xcomp".equals(rel2))
aux += " " + dep2.word();
switch(rel2) {
case "nmod:in":
final String longLocation = dep2.word();
details += "in ";
for (final SemanticGraphEdge keshet2 : dependencies.getOutEdgesSorted(dep2)) if ("compound".equals(keshet2.getRelation() + ""))
details += keshet2.getDependent().word() + " ";
details += longLocation;
break;
case "nmod:during":
details += "during" + " " + dep2.word() + " ";
break;
case "nmod:under":
details += "under " + dep2.word() + " ";
break;
case "nmod:of":
details += "of " + dep2.word();
break;
case "nmod:at":
details += "at" + " " + dep2.word() + " ";
break;
}
if ("suspicion".equals(keshet.getSource().word()) && "acl:of".equals(rel2))
details += dep2.word();
}
reason += dep.word();
reason += aux;
}
}
}
if (!"".equals(prefixDetails.trim())) {
this.Information.add(prefixDetails.trim());
System.out.println((this.Information.get(index) + ""));
++index;
}
this.Information.add((reason + " " + details).trim());
System.out.println((this.Information.get(index) + ""));
++index;
}
}
}
Aggregations