use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project Info-Evaluation by TechnionYP5777.
the class AnalyzeParagragh method Analyze.
public TableTuple Analyze() {
/*
* First step is initiating the Stanford CoreNLP pipeline (the pipeline
* will be later used to evaluate the text and annotate it) Pipeline is
* initiated using a Properties object which is used for setting all
* needed entities, annotations, training data and so on, in order to
* customized the pipeline initialization to contains only the models
* you need
*/
final Properties props = new Properties();
/*
* The "annotators" property key tells the pipeline which entities
* should be initiated with our pipeline object, See
* http://nlp.stanford.edu/software/corenlp.shtml for a complete
* reference to the "annotators" values you can set here and what they
* will contribute to the analyzing process
*/
props.put("annotators", "tokenize,ssplit, pos, regexner, parse,lemma,natlog,openie");
final StanfordCoreNLP pipeLine = new StanfordCoreNLP(props);
// inputText will be the text to evaluate in this example
final String inputText = input + "";
final Annotation document = new Annotation(inputText);
// Finally we use the pipeline to annotate the document we created
pipeLine.annotate(document);
final String $ = getName();
final String input_date = getDate(year);
String reason = "";
// more details about the reason. e.g - where it
String details = "";
// happened.
String aux = "";
String accurate_name = "";
for (final CoreMap sentence : document.get(SentencesAnnotation.class)) {
final SemanticGraph dependencies = sentence.get(CollapsedDependenciesAnnotation.class);
for (final IndexedWord root : dependencies.getRoots()) for (final SemanticGraphEdge edge : dependencies.getOutEdgesSorted(root)) {
final IndexedWord dep = edge.getDependent();
final String rel = edge.getRelation() + "";
if (!"arrested".equals(edge.getGovernor().word()))
switch(rel) {
case "nmod:in":
details += "in" + " " + dep.word() + " ";
break;
case "nmod:during":
details += "during" + " " + dep.word() + " ";
break;
case "nmod:at":
details += "at" + " " + dep.word() + " ";
break;
}
else {
//Finding the name in a more accurate manner:
if ("nsubjpass".equals(rel)) {
for (final SemanticGraphEdge keshet : dependencies.getOutEdgesSorted(dep)) {
final IndexedWord dep2 = keshet.getDependent();
final String rel2 = keshet.getRelation() + "";
if ((dep2.ner() != null && "PERSON".equals(dep2.ner())) || "compound".equals(rel2) || "det".equals(rel2))
accurate_name += dep2.word() + " ";
}
accurate_name += dep.word();
}
//Finding the reason in the paragraph
if ("advcl".equals(rel) || "advcl:for".equals(rel) || "nmod:for".equals(rel)) {
for (final SemanticGraphEdge keshet : dependencies.getOutEdgesSorted(dep)) {
final String rel2 = keshet.getRelation() + "";
final IndexedWord dep2 = keshet.getDependent();
if ("amod".equals(rel2) || "dobj".equals(rel2))
reason += dep2.word() + " ";
if ("xcomp".equals(rel2))
aux += " " + dep2.word();
switch(rel2) {
case "nmod:in":
final String longLocation = dep2.word();
details += "in ";
for (final SemanticGraphEdge keshet2 : dependencies.getOutEdgesSorted(dep2)) if ("compound".equals(keshet2.getRelation() + ""))
details += keshet2.getDependent().word() + " ";
details += longLocation;
break;
case "nmod:during":
details += "during" + " " + dep2.word() + " ";
break;
case "nmod:under":
details += "under " + dep2.word() + " ";
break;
case "nmod:of":
details += "of " + dep2.word();
break;
case "nmod:at":
details += "at" + " " + dep2.word() + " ";
break;
}
if ("suspicion".equals(keshet.getSource().word()) && "acl:of".equals(rel2))
details += dep2.word();
}
reason += dep.word();
reason += aux;
}
}
}
}
return new TableTuple(accurate_name.isEmpty() ? $ : accurate_name, input_date, (reason + " " + details).trim());
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method correctWHAttachment.
/**
* Tries to correct complicated cases of WH-movement in
* sentences such as "What does Mary seem to have?" in
* which "What" should attach to "have" instead of the
* control verb.
*
* @param sg The Semantic graph to operate on.
*/
private static void correctWHAttachment(SemanticGraph sg) {
/* Semgrexes require a graph with a root. */
if (sg.getRoots().isEmpty())
return;
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
while (matcher.findNextMatchingNode()) {
IndexedWord root = matcher.getNode("root");
IndexedWord embeddedVerb = matcher.getNode("embedded");
IndexedWord wh = matcher.getNode("wh");
IndexedWord dobj = matcher.getNode("obj");
/* Check if the object is a WH-word. */
if (wh.tag().startsWith("W")) {
boolean reattach = false;
/* If the control verb already has an object, then
we have to reattach th WH-word to the verb in the embedded clause. */
if (dobj != null) {
reattach = true;
} else {
/* If the control verb can't have an object, we also have to reattach. */
String lemma = Morphology.lemmaStatic(root.value(), root.tag());
if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
reattach = true;
}
}
if (reattach) {
SemanticGraphEdge edge = sg.getEdge(root, wh);
if (edge != null) {
sg.removeEdge(edge);
sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
}
}
}
}
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method processNamesHelper.
private static void processNamesHelper(SemanticGraph sg, IndexedWord oldHead, List<IndexedWord> nameParts) {
if (nameParts.size() < 1) {
// if the named entity only spans one token, change compound relations
// to nmod relations to get the right structure for NPs with additional modifiers
// such as "Mrs. Clinton".
Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead));
for (IndexedWord child : children) {
SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
if (oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) {
sg.addEdge(oldHead, child, UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
}
}
return;
}
// sort nameParts
Collections.sort(nameParts);
// check whether {nameParts[0], ..., nameParts[n], oldHead} are a contiguous NP
for (int i = nameParts.get(0).index(), end = oldHead.index(); i < end; i++) {
IndexedWord node = sg.getNodeByIndexSafe(i);
if (node == null) {
return;
}
if (!nameParts.contains(node) && PUNCT_TAG_FILTER.test(node.tag())) {
// not in nameParts and not a punctuation mark => not a contiguous NP
return;
}
}
IndexedWord gov = sg.getParent(oldHead);
if (gov == null && !sg.getRoots().contains(oldHead)) {
return;
}
IndexedWord newHead = nameParts.get(0);
Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead));
//change structure and relations
for (IndexedWord child : children) {
if (child == newHead) {
// make the leftmost word the new head
if (gov == null) {
sg.getRoots().add(newHead);
sg.getRoots().remove(oldHead);
} else {
SemanticGraphEdge oldEdge = sg.getEdge(gov, oldHead);
sg.addEdge(gov, newHead, oldEdge.getRelation(), oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
}
// swap direction of relation between old head and new head and change it to name relation.
SemanticGraphEdge oldEdge = sg.getEdge(oldHead, newHead);
sg.addEdge(newHead, oldHead, UniversalEnglishGrammaticalRelations.NAME_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
} else if (nameParts.contains(child)) {
// remove relation between the old head and part of the name
// and introduce new relation between new head and part of the name
SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
sg.addEdge(newHead, child, UniversalEnglishGrammaticalRelations.NAME_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
} else {
// attach word to new head
SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
//if not the entire compound is part of a named entity, attach the other tokens via an nmod relation
GrammaticalRelation reln = oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER ? UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER : oldEdge.getRelation();
sg.addEdge(newHead, child, reln, oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
}
}
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method expandPPConjunction.
/*
* Used by expandPPConjunction.
*/
private static void expandPPConjunction(SemanticGraph sg, IndexedWord gov, List<IndexedWord> conjDeps, IndexedWord ccDep) {
IndexedWord nmodGov = sg.getParent(gov);
if (nmodGov == null)
return;
IndexedWord conjGov = nmodGov.getOriginal() != null ? nmodGov.getOriginal() : nmodGov;
GrammaticalRelation rel = sg.reln(nmodGov, gov);
List<IndexedWord> newConjDeps = Generics.newLinkedList();
for (IndexedWord conjDep : conjDeps) {
IndexedWord nmodGovCopy = nmodGov.makeSoftCopy();
/* Change conj(nmod-1, nmod-2) to nmod(nmod-1-gov, nmod-2) */
SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
if (edge != null) {
sg.removeEdge(edge);
sg.addEdge(nmodGovCopy, conjDep, rel, Double.NEGATIVE_INFINITY, false);
}
/* Add relation to copy node. */
sg.addEdge(conjGov, nmodGovCopy, CONJUNCT, Double.NEGATIVE_INFINITY, false);
newConjDeps.add(nmodGovCopy);
}
/* Attach CC node to conjGov */
SemanticGraphEdge edge = sg.getEdge(gov, ccDep);
if (edge != null) {
sg.removeEdge(edge);
sg.addEdge(conjGov, ccDep, COORDINATION, Double.NEGATIVE_INFINITY, false);
}
/* Add conjunction information for these relations already at this point.
* It could be that we add several coordinating conjunctions while collapsing
* and we might not know which conjunction belongs to which conjunct at a later
* point.
*/
addConjToReln(sg, conjGov, newConjDeps, ccDep);
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method treatCC.
private static void treatCC(SemanticGraph sg) {
// Construct a map from tree nodes to the set of typed
// dependencies in which the node appears as dependent.
Map<IndexedWord, Set<SemanticGraphEdge>> map = Generics.newHashMap();
// Construct a map of tree nodes being governor of a subject grammatical
// relation to that relation
Map<IndexedWord, SemanticGraphEdge> subjectMap = Generics.newHashMap();
// Construct a set of TreeGraphNodes with a passive auxiliary on them
Set<IndexedWord> withPassiveAuxiliary = Generics.newHashSet();
// Construct a map of tree nodes being governor of an object grammatical
// relation to that relation
// Map<TreeGraphNode, TypedDependency> objectMap = new
// HashMap<TreeGraphNode, TypedDependency>();
List<IndexedWord> rcmodHeads = Generics.newArrayList();
List<IndexedWord> prepcDep = Generics.newArrayList();
for (SemanticGraphEdge edge : sg.edgeIterable()) {
if (!map.containsKey(edge.getDependent())) {
// NB: Here and in other places below, we use a TreeSet (which extends
// SortedSet) to guarantee that results are deterministic)
map.put(edge.getDependent(), new TreeSet<>());
}
map.get(edge.getDependent()).add(edge);
if (edge.getRelation().equals(AUX_PASSIVE_MODIFIER)) {
withPassiveAuxiliary.add(edge.getGovernor());
}
// look for subjects
if (edge.getRelation().getParent() == NOMINAL_SUBJECT || edge.getRelation().getParent() == SUBJECT || edge.getRelation().getParent() == CLAUSAL_SUBJECT) {
if (!subjectMap.containsKey(edge.getGovernor())) {
subjectMap.put(edge.getGovernor(), edge);
}
}
// look for rcmod relations
if (edge.getRelation() == RELATIVE_CLAUSE_MODIFIER) {
rcmodHeads.add(edge.getGovernor());
}
// to avoid wrong propagation of dobj
if (edge.getRelation().toString().startsWith("acl:") || edge.getRelation().toString().startsWith("advcl:")) {
prepcDep.add(edge.getDependent());
}
}
// log.info(map);
// if (DEBUG) log.info("Subject map: " + subjectMap);
// if (DEBUG) log.info("Object map: " + objectMap);
// log.info(rcmodHeads);
// create a new list of typed dependencies
//Collection<TypedDependency> newTypedDeps = new ArrayList<TypedDependency>(list);
SemanticGraph sgCopy = sg.makeSoftCopy();
// find typed deps of form conj(gov,dep)
for (SemanticGraphEdge edge : sgCopy.edgeIterable()) {
if (UniversalEnglishGrammaticalRelations.getConjs().contains(edge.getRelation())) {
IndexedWord gov = edge.getGovernor();
IndexedWord dep = edge.getDependent();
// look at the dep in the conjunct
Set<SemanticGraphEdge> gov_relations = map.get(gov);
// log.info("gov " + gov);
if (gov_relations != null) {
for (SemanticGraphEdge edge1 : gov_relations) {
// log.info("gov rel " + td1);
IndexedWord newGov = edge1.getGovernor();
// is possible to have overlapping newGov & dep
if (newGov.equals(dep)) {
continue;
}
GrammaticalRelation newRel = edge1.getRelation();
//TODO: Do we want to copy case markers here?
if (newRel != ROOT && newRel != CASE_MARKER) {
if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) {
// to prevent wrong propagation in the case of long dependencies in relative clauses
if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) {
if (DEBUG) {
log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)");
}
sg.addEdge(newGov, dep, newRel, Double.NEGATIVE_INFINITY, true);
}
} else {
if (DEBUG) {
log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep);
}
sg.addEdge(newGov, dep, newRel, Double.NEGATIVE_INFINITY, true);
}
}
}
}
// propagate subjects
// look at the gov in the conjunct: if it is has a subject relation,
// the dep is a verb and the dep doesn't have a subject relation
// then we want to add a subject relation for the dep.
// (By testing for the dep to be a verb, we are going to miss subject of
// copular verbs! but
// is it safe to relax this assumption?? i.e., just test for the subject
// part)
// CDM 2008: I also added in JJ, since participial verbs are often
// tagged JJ
String tag = dep.tag();
if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && !subjectMap.containsKey(dep)) {
SemanticGraphEdge tdsubj = subjectMap.get(gov);
// check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
// add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
GrammaticalRelation relation = tdsubj.getRelation();
if (relation == NOMINAL_PASSIVE_SUBJECT) {
if (isDefinitelyActive(tag)) {
relation = NOMINAL_SUBJECT;
}
} else if (relation == CLAUSAL_PASSIVE_SUBJECT) {
if (isDefinitelyActive(tag)) {
relation = CLAUSAL_SUBJECT;
}
} else if (relation == NOMINAL_SUBJECT) {
if (withPassiveAuxiliary.contains(dep)) {
relation = NOMINAL_PASSIVE_SUBJECT;
}
} else if (relation == CLAUSAL_SUBJECT) {
if (withPassiveAuxiliary.contains(dep)) {
relation = CLAUSAL_PASSIVE_SUBJECT;
}
}
if (DEBUG) {
log.info("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.getDependent() + " (subj propagation case)");
}
sg.addEdge(dep, tdsubj.getDependent(), relation, Double.NEGATIVE_INFINITY, true);
}
// propagate objects
// cdm july 2010: This bit of code would copy a dobj from the first
// clause to a later conjoined clause if it didn't
// contain its own dobj or prepc. But this is too aggressive and wrong
// if the later clause is intransitive
// (including passivized cases) and so I think we have to not have this
// done always, and see no good "sometimes" heuristic.
// IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
// (SBAR).
// if (objectMap.containsKey(gov) &&
// dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
// && ! prepcDep.contains(gov)) {
// TypedDependency tdobj = objectMap.get(gov);
// if (DEBUG) {
// log.info("Adding new " + tdobj.reln() + " dependency from "
// + dep + " to " + tdobj.dep() + " (obj propagation case)");
// }
// newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
// tdobj.dep()));
// }
}
}
}
Aggregations