use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method addStrandedPobj.
// Using this makes addStrandedPobj a lot cleaner looking, but it
// makes the converter roughly 2% slower. Might not be worth it.
// Similar changes could be made to many of the other complicated
// collapsing methods.
// static final SemgrexPattern strandedPobjSemgrex = SemgrexPattern.compile("{}=head >rcmod ({} [ == {}=prepgov | >xcomp {}=prepgov | >conj {}=prepgov ]) : {}=prepgov >prep ({}=prepdep !>pcomp {} !> pobj {})");
// // Deal with preposition stranding in relative clauses.
// // For example, "the only thing I'm rooting for"
// // This method will add pobj(for, thing) by connecting using the rcmod and prep
// private static void addStrandedPobj(List<TypedDependency> list) {
// SemanticGraph graph = new SemanticGraph(list);
// SemgrexMatcher matcher = strandedPobjSemgrex.matcher(graph);
// while (matcher.find()) {
// IndexedWord gov = matcher.getNode("prepdep");
// IndexedWord dep = matcher.getNode("head");
// TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, gov, dep);
// newDep.setExtra();
// list.add(newDep);
// }
// }
// Deal with preposition stranding in relative clauses.
// For example, "the only thing I'm rooting for"
// This method will add pobj(for, thing) by connecting using the rcmod and prep
private static void addStrandedPobj(List<TypedDependency> list) {
List<IndexedWord> depNodes = null;
List<TypedDependency> newDeps = null;
for (TypedDependency rcmod : list) {
if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
continue;
}
IndexedWord head = rcmod.gov();
if (depNodes == null) {
depNodes = Generics.newArrayList();
} else {
depNodes.clear();
}
depNodes.add(rcmod.dep());
for (TypedDependency connected : list) {
if (connected.gov().equals(rcmod.dep()) && (connected.reln() == XCLAUSAL_COMPLEMENT || connected.reln() == CONJUNCT)) {
depNodes.add(connected.dep());
}
}
for (IndexedWord dep : depNodes) {
for (TypedDependency prep : list) {
if (!prep.gov().equals(dep) || prep.reln() != PREPOSITIONAL_MODIFIER) {
continue;
}
boolean found = false;
for (TypedDependency other : list) {
if (other.gov().equals(prep.dep()) && (other.reln() == PREPOSITIONAL_COMPLEMENT || other.reln() == PREPOSITIONAL_OBJECT)) {
found = true;
break;
}
}
if (!found) {
if (newDeps == null) {
newDeps = Generics.newArrayList();
}
TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, prep.dep(), head);
newDeps.add(newDep);
}
}
}
}
if (newDeps != null) {
list.addAll(newDeps);
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method collapseMultiWordPrep.
/**
* Collapse multiword preposition of the following format:
* prep|advmod|dep|amod(gov, mwp0) dep(mpw0,mwp1) pobj|pcomp(mwp1, compl) or
* pobj|pcomp(mwp0, compl) -> prep_mwp0_mwp1(gov, compl)
* <p/>
*
* @param list List of typedDependencies to work on,
* @param newTypedDeps List of typedDependencies that we construct
* @param str_mwp0 First part of the multiword preposition to construct the collapsed
* preposition
* @param str_mwp1 Second part of the multiword preposition to construct the
* collapsed preposition
* @param w_mwp0 First part of the multiword preposition that we look for
* @param w_mwp1 Second part of the multiword preposition that we look for
*/
private static void collapseMultiWordPrep(Collection<TypedDependency> list, Collection<TypedDependency> newTypedDeps, String str_mwp0, String str_mwp1, String w_mwp0, String w_mwp1) {
// first find the multiword_preposition: dep(mpw[0], mwp[1])
// the two words should be next to another in the sentence (difference of
// indexes = 1)
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
TypedDependency dep = null;
for (TypedDependency td : list) {
if (td.gov().value().equalsIgnoreCase(w_mwp0) && td.dep().value().equalsIgnoreCase(w_mwp1) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp0 = td.gov();
mwp1 = td.dep();
dep = td;
}
}
if (mwp0 == null) {
return;
}
// now search for prep|advmod|dep|amod(gov, mwp0)
IndexedWord governor = null;
TypedDependency prep = null;
for (TypedDependency td1 : list) {
if ((td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == ADVERBIAL_MODIFIER || td1.reln() == ADJECTIVAL_MODIFIER || td1.reln() == DEPENDENT || td1.reln() == MULTI_WORD_EXPRESSION) && td1.dep().equals(mwp0)) {
// we found prep|advmod|dep|amod(gov, mwp0)
prep = td1;
governor = prep.gov();
}
}
if (prep == null) {
return;
}
// search for the complement: pobj|pcomp(mwp1,X)
// or for pobj|pcomp(mwp0,X)
// There may be more than one in weird constructions; if there are several,
// take the one with the LOWEST index!
TypedDependency pobj = null;
TypedDependency newtd = null;
for (TypedDependency td2 : list) {
if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td2.gov().equals(mwp1) || td2.gov().equals(mwp0))) {
if (pobj == null || pobj.dep().index() > td2.dep().index()) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr;
if (td2.reln() == PREPOSITIONAL_COMPLEMENT) {
gr = EnglishGrammaticalRelations.getPrepC(str_mwp0 + '_' + str_mwp1);
} else {
gr = EnglishGrammaticalRelations.getPrep(str_mwp0 + '_' + str_mwp1);
}
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
}
}
if (pobj == null || newtd == null) {
return;
}
if (DEBUG) {
log.info("Removing " + prep + ", " + dep + ", and " + pobj);
log.info(" and adding " + newtd);
}
prep.setReln(KILL);
dep.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1)) {
// one?
if (td1.reln() == TEMPORAL_MODIFIER) {
// special case when an extra NP-TMP is buried in a PP for
// "during the same period last year"
td1.setGov(pobj.dep());
} else {
td1.setGov(governor);
}
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method addRef.
/**
* Look for ref rules for a given word. We look through the
* children and grandchildren of the rcmod dependency, and if any
* children or grandchildren depend on a that/what/which/etc word,
* we take the leftmost that/what/which/etc word as the dependent
* for the ref TypedDependency.
*/
private static void addRef(Collection<TypedDependency> list) {
List<TypedDependency> newDeps = new ArrayList<>();
for (TypedDependency rcmod : list) {
if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
// we only add ref dependencies across relative clauses
continue;
}
IndexedWord head = rcmod.gov();
IndexedWord modifier = rcmod.dep();
TypedDependency leftChild = null;
for (TypedDependency child : list) {
if (child.gov().equals(modifier) && EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(child.dep().value()).matches() && (leftChild == null || child.dep().index() < leftChild.dep().index())) {
leftChild = child;
}
}
// TODO: could be made more efficient
TypedDependency leftGrandchild = null;
for (TypedDependency child : list) {
if (!child.gov().equals(modifier)) {
continue;
}
for (TypedDependency grandchild : list) {
if (grandchild.gov().equals(child.dep()) && EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(grandchild.dep().value()).matches() && (leftGrandchild == null || grandchild.dep().index() < leftGrandchild.dep().index())) {
leftGrandchild = grandchild;
}
}
}
TypedDependency newDep = null;
if (leftGrandchild != null && (leftChild == null || leftGrandchild.dep().index() < leftChild.dep().index())) {
newDep = new TypedDependency(REFERENT, head, leftGrandchild.dep());
} else if (leftChild != null) {
newDep = new TypedDependency(REFERENT, head, leftChild.dep());
}
if (newDep != null) {
newDeps.add(newDep);
}
}
for (TypedDependency newDep : newDeps) {
if (!list.contains(newDep)) {
newDep.setExtra();
list.add(newDep);
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method collapse3WP.
/**
* Collapse 3-word preposition of the following format: <br/>
* This will be the case when the preposition is analyzed as a NP <br/>
* prep(gov, mwp0) <br/>
* X(mwp0,mwp1) <br/>
* X(mwp1,mwp2) <br/>
* pobj|pcomp(mwp2, compl) <br/>
* -> prep_mwp[0]_mwp[1]_mwp[2](gov, compl)
* <p/>
*
* It also takes flat annotation into account: <br/>
* prep(gov,mwp0) <br/>
* X(mwp0,mwp1) <br/>
* X(mwp0,mwp2) <br/>
* pobj|pcomp(mwp0, compl) <br/>
* -> prep_mwp[0]_mwp[1]_mwp[2](gov, compl)
* <p/>
*
*
* @param list List of typedDependencies to work on
*/
private static void collapse3WP(Collection<TypedDependency> list) {
Collection<TypedDependency> newTypedDeps = new ArrayList<>();
// first, loop over the prepositions for NP annotation
for (String[] mwp : THREEWORD_PREPS) {
newTypedDeps.clear();
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
IndexedWord mwp2 = null;
TypedDependency dep1 = null;
TypedDependency dep2 = null;
for (TypedDependency td : list) {
if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp0 = td.gov();
mwp1 = td.dep();
dep1 = td;
}
}
for (TypedDependency td : list) {
if (td.gov().equals(mwp1) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp2 = td.dep();
dep2 = td;
}
}
if (dep1 != null && dep2 != null) {
// now search for prep(gov, mwp0)
IndexedWord governor = null;
TypedDependency prep = null;
for (TypedDependency td1 : list) {
if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().equals(mwp0)) {
// we
// found
// prep(gov,
// mwp0)
prep = td1;
governor = prep.gov();
}
}
// search for the complement: pobj|pcomp(mwp2,X)
TypedDependency pobj = null;
TypedDependency newtd = null;
for (TypedDependency td2 : list) {
if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp2)) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp2)) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
}
// and add the new one
if (prep != null && pobj != null && newtd != null) {
prep.setReln(KILL);
dep1.setReln(KILL);
dep2.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
td1.setGov(governor);
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
}
}
// second, loop again looking at flat annotation
for (String[] mwp : THREEWORD_PREPS) {
newTypedDeps.clear();
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
IndexedWord mwp2 = null;
TypedDependency dep1 = null;
TypedDependency dep2 = null;
// indexes = 1)
for (TypedDependency td : list) {
if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp0 = td.gov();
mwp1 = td.dep();
dep1 = td;
}
}
// indexes = 2)
for (TypedDependency td : list) {
if (td.gov().equals(mwp0) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 2) {
mwp2 = td.dep();
dep2 = td;
}
}
if (dep1 != null && dep2 != null) {
// now search for prep(gov, mwp0)
IndexedWord governor = null;
TypedDependency prep = null;
for (TypedDependency td1 : list) {
if (td1.dep().equals(mwp0) && td1.reln() == PREPOSITIONAL_MODIFIER) {
// we
// found
// prep(gov,
// mwp0)
prep = td1;
governor = prep.gov();
}
}
// search for the complement: pobj|pcomp(mwp0,X)
TypedDependency pobj = null;
TypedDependency newtd = null;
for (TypedDependency td2 : list) {
if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_OBJECT) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
}
// and add the new one
if (prep != null && pobj != null && newtd != null) {
prep.setReln(KILL);
dep1.setReln(KILL);
dep2.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
td1.setGov(governor);
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method addExtraNSubj.
/**
* Add extra nsubj dependencies when collapsing basic dependencies.
* <br>
* In the general case, we look for an aux modifier under an xcomp
* modifier, and assuming there aren't already associated nsubj
* dependencies as daughters of the original xcomp dependency, we
* add nsubj dependencies for each nsubj daughter of the aux.
* <br>
* There is also a special case for "to" words, in which case we add
* a dependency if and only if there is no nsubj associated with the
* xcomp and there is no other aux dependency. This accounts for
* sentences such as "he decided not to" with no following verb.
*/
private static void addExtraNSubj(Collection<TypedDependency> list) {
List<TypedDependency> newDeps = new ArrayList<>();
for (TypedDependency xcomp : list) {
if (xcomp.reln() != XCLAUSAL_COMPLEMENT) {
// we only add extra nsubj dependencies to some xcomp dependencies
continue;
}
IndexedWord modifier = xcomp.dep();
IndexedWord head = xcomp.gov();
boolean hasSubjectDaughter = false;
boolean hasAux = false;
List<IndexedWord> subjects = Generics.newArrayList();
List<IndexedWord> objects = Generics.newArrayList();
for (TypedDependency dep : list) {
// already have a subject dependency
if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(modifier)) {
hasSubjectDaughter = true;
break;
}
if (dep.reln() == AUX_MODIFIER && dep.gov().equals(modifier)) {
hasAux = true;
}
if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(head)) {
subjects.add(dep.dep());
}
if (dep.reln() == DIRECT_OBJECT && dep.gov().equals(head)) {
objects.add(dep.dep());
}
}
// if we already have an nsubj dependency, no need to add an extra nsubj
if (hasSubjectDaughter) {
continue;
}
if ((modifier.value().equalsIgnoreCase("to") && hasAux) || (!modifier.value().equalsIgnoreCase("to") && !hasAux)) {
continue;
}
// Instead of nsubj(do, law) we want nsubj(do, them)
if (objects.size() > 0) {
for (IndexedWord object : objects) {
TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, object);
newDeps.add(newDep);
}
} else {
for (IndexedWord subject : subjects) {
TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, subject);
newDeps.add(newDep);
}
}
}
for (TypedDependency newDep : newDeps) {
if (!list.contains(newDep)) {
newDep.setExtra();
list.add(newDep);
}
}
}
Aggregations