use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method removeDep.
/**
* Remove duplicate relations: it can happen when collapsing stranded
* prepositions. E.g., "What does CPR stand for?" we get dep(stand, what), and
* after collapsing we also get prep_for(stand, what).
*
* @param list A list of typed dependencies to check through
*/
private static void removeDep(Collection<TypedDependency> list) {
Set<GrammaticalRelation> prepRels = Generics.newHashSet(EnglishGrammaticalRelations.getPreps());
prepRels.addAll(EnglishGrammaticalRelations.getPrepsC());
for (TypedDependency td1 : list) {
if (prepRels.contains(td1.reln())) {
// if we have a prep_ relation
IndexedWord gov = td1.gov();
IndexedWord dep = td1.dep();
for (TypedDependency td2 : list) {
if (td2.reln() == DEPENDENT && td2.gov().equals(gov) && td2.dep().equals(dep)) {
td2.setReln(KILL);
}
}
}
}
// now remove typed dependencies with reln "kill"
for (Iterator<TypedDependency> iter = list.iterator(); iter.hasNext(); ) {
TypedDependency td = iter.next();
if (td.reln() == KILL) {
if (DEBUG) {
log.info("Removing duplicate relation: " + td);
}
iter.remove();
}
}
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method collapsePrepAndPoss.
private static void collapsePrepAndPoss(Collection<TypedDependency> list) {
// Man oh man, how gnarly is the logic of this method....
Collection<TypedDependency> newTypedDeps = new ArrayList<>();
// Construct a map from tree nodes to the set of typed
// dependencies in which the node appears as governor.
// cdm: could use CollectionValuedMap here!
Map<IndexedWord, SortedSet<TypedDependency>> map = Generics.newHashMap();
List<IndexedWord> vmod = Generics.newArrayList();
for (TypedDependency typedDep : list) {
if (!map.containsKey(typedDep.gov())) {
map.put(typedDep.gov(), new TreeSet<>());
}
map.get(typedDep.gov()).add(typedDep);
if (typedDep.reln() == VERBAL_MODIFIER) {
// look for aux deps which indicate this was a to-be verb
boolean foundAux = false;
for (TypedDependency auxDep : list) {
if (auxDep.reln() != AUX_MODIFIER) {
continue;
}
if (!auxDep.gov().equals(typedDep.dep()) || !auxDep.dep().value().equalsIgnoreCase("to")) {
continue;
}
foundAux = true;
break;
}
if (!foundAux) {
vmod.add(typedDep.dep());
}
}
}
for (TypedDependency td1 : list) {
if (td1.reln() != PREPOSITIONAL_MODIFIER) {
continue;
}
IndexedWord td1Dep = td1.dep();
SortedSet<TypedDependency> possibles = map.get(td1Dep);
if (possibles == null) {
continue;
}
// look for the "second half"
// unique: the head prep and whether it should be pobj
Pair<TypedDependency, Boolean> prepDep = null;
// treat as unique
TypedDependency ccDep = null;
// list of dep and prepOtherDep and pobj (or pcomp)
List<Triple<TypedDependency, TypedDependency, Boolean>> conjs = new ArrayList<>();
Set<TypedDependency> otherDtrs = new TreeSet<>();
// first look for a conj(prep, prep) (there might be several conj relations!!!)
boolean samePrepositionInEachConjunct = true;
int conjIndex = -1;
for (TypedDependency td2 : possibles) {
if (td2.reln() == CONJUNCT) {
IndexedWord td2Dep = td2.dep();
String td2DepPOS = td2Dep.tag();
if (td2DepPOS.equals("IN") || td2DepPOS.equals("TO")) {
samePrepositionInEachConjunct = samePrepositionInEachConjunct && td2Dep.value().equals(td1Dep.value());
Set<TypedDependency> possibles2 = map.get(td2Dep);
// default of collapsing preposition is prep_
boolean pobj = true;
TypedDependency prepOtherDep = null;
if (possibles2 != null) {
for (TypedDependency td3 : possibles2) {
IndexedWord td3Dep = td3.dep();
String td3DepPOS = td3Dep.tag();
// problem with multiple prep (mcdm)
if ((td3.reln() == PREPOSITIONAL_OBJECT || td3.reln() == PREPOSITIONAL_COMPLEMENT) && (!(td3DepPOS.equals("IN") || td3DepPOS.equals("TO"))) && prepOtherDep == null) {
prepOtherDep = td3;
if (td3.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = false;
}
} else {
otherDtrs.add(td3);
}
}
}
if (conjIndex < td2Dep.index()) {
conjIndex = td2Dep.index();
}
conjs.add(new Triple<>(td2, prepOtherDep, pobj));
}
}
}
if (conjs.isEmpty()) {
continue;
}
// if we have a conj under a preposition dependency, we look for the other
// parts
String td1DepPOS = td1Dep.tag();
for (TypedDependency td2 : possibles) {
// the cc dep must have an index smaller than the dep of conjDep
if (td2.reln() == COORDINATION && td2.dep().index() < conjIndex) {
ccDep = td2;
} else {
IndexedWord td2Dep = td2.dep();
String td2DepPOS = td2Dep.tag();
// "; td2.dep().index(): " + td2.dep().index());
if ((td2.reln() == DEPENDENT || td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && prepDep == null && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO")))) {
// same index trick, in case we have multiple deps
// I deleted this to see if it helped [cdm Jan 2010] &&
// td2.dep().index() < index)
prepDep = new Pair<>(td2, td2.reln() != PREPOSITIONAL_COMPLEMENT);
} else if (!inConjDeps(td2, conjs)) {
// don't want to add the conjDep
// again!
otherDtrs.add(td2);
}
}
}
if (prepDep == null || ccDep == null) {
// we can't deal with it in the hairy prep/conj interaction case!
continue;
}
if (DEBUG) {
// ccDep must be non-null given test above
log.info("!! Conj and prep case:");
log.info(" td1 (prep): " + td1);
log.info(" Kids of td1 are: " + possibles);
log.info(" prepDep: " + prepDep);
log.info(" ccDep: " + ccDep);
log.info(" conjs: " + conjs);
log.info(" samePrepositionInEachConjunct: " + samePrepositionInEachConjunct);
log.info(" otherDtrs: " + otherDtrs);
}
// check if we have the same prepositions in the conjunction
if (samePrepositionInEachConjunct) {
// conjDep != null && prepOtherDep !=
// null &&
// OK, we have a conjunction over parallel PPs: Fred flew to Greece and
// to Serbia.
GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
newTypedDeps.add(tdNew);
if (DEBUG) {
log.info("PrepPoss Conj branch (two parallel PPs) adding: " + tdNew);
log.info(" removing: " + td1 + " " + prepDep + " " + ccDep);
}
// remember these are "used up"
td1.setReln(KILL);
prepDep.first().setReln(KILL);
ccDep.setReln(KILL);
for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
TypedDependency conjDep = trip.first();
TypedDependency prepOtherDep = trip.second();
if (prepOtherDep == null) {
// prepositions the same. We just clean up the mess.
if (DEBUG) {
log.info(" apparent misparse: same P twice with only one NP object (prepOtherDep is null)");
log.info(" removing: " + conjDep);
}
ccDep.setReln(KILL);
} else {
TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), prepDep.first().dep(), prepOtherDep.dep());
newTypedDeps.add(tdNew2);
if (DEBUG) {
log.info(" adding: " + tdNew2);
log.info(" removing: " + conjDep + " " + prepOtherDep);
}
prepOtherDep.setReln(KILL);
}
conjDep.setReln(KILL);
}
// promote dtrs that would be orphaned
for (TypedDependency otd : otherDtrs) {
if (DEBUG) {
log.info("Changed " + otd);
}
otd.setGov(td1.gov());
if (DEBUG) {
log.info(" to " + otd);
}
}
// Some things in possibles may have already been changed, so check gov
if (DEBUG) {
log.info("td1: " + td1 + "; possibles: " + possibles);
}
for (TypedDependency td2 : possibles) {
// }
if (td2.reln() != KILL && td2.gov().equals(td1.dep())) {
// != CONJUNCT
if (DEBUG) {
log.info("Changing " + td2 + " to have governor of " + td1 + " [a]");
}
td2.setGov(td1.gov());
}
}
// This one has been dealt with successfully
continue;
}
// that is, the prepOtherDep should be the same as prepDep !
for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
if (trip.first() != null && trip.second() == null) {
trip.setSecond(new TypedDependency(prepDep.first().reln(), trip.first().dep(), prepDep.first().dep()));
trip.setThird(prepDep.second());
}
}
// we have two different prepositions in the conjunction
// in this case we need to add a node
// "Bill jumped over the fence and through the hoop"
// prep_over(jumped, fence)
// conj_and(jumped, jumped)
// prep_through(jumped, hoop)
// Extra complication:
// If "jumped" is already part of a conjunction, we should add the new one off that rather than chaining
IndexedWord conjHead = td1.gov();
for (TypedDependency td3 : list) {
if (td3.dep().equals(td1.gov()) && td3.reln().equals(CONJUNCT)) {
conjHead = td3.gov();
}
}
GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
newTypedDeps.add(tdNew);
if (DEBUG) {
log.info("ConjPP (different preps) adding: " + tdNew);
log.info(" deleting: " + td1 + " " + prepDep.first() + " " + ccDep);
}
// remember these are "used up"
td1.setReln(KILL);
prepDep.first().setReln(KILL);
ccDep.setReln(KILL);
// so far we added the first prep grammatical relation
int copyNumber = 1;
for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
TypedDependency conjDep = trip.first();
TypedDependency prepOtherDep = trip.second();
boolean pobj = trip.third();
// OK, we have a conjunction over different PPs
// we create a new node;
// in order to make a distinction between the original node and its copy
// we set the "copyCount" variable in the IndexedWord
// existence of copyCount > 0 is checked at printing (toString method of
// TypedDependency)
IndexedWord label = td1.gov().makeSoftCopy(copyNumber);
copyNumber++;
// now we add the conjunction relation between conjHead (either td1.gov
// or what it is itself conjoined with) and the copy
// the copy has the same label as td1.gov() but is another TreeGraphNode
// todo: Or that's the plan; there are a couple of knock on changes to fix before we can do this!
// TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), conjHead, label);
TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), td1.gov(), label);
newTypedDeps.add(tdNew2);
// now we still need to add the second prep grammatical relation
// between the copy and the dependent of the prepOtherDep node
TypedDependency tdNew3;
GrammaticalRelation reln2 = determinePrepRelation(map, vmod, conjDep, td1, pobj);
tdNew3 = new TypedDependency(reln2, label, prepOtherDep.dep());
newTypedDeps.add(tdNew3);
if (DEBUG) {
log.info(" adding: " + tdNew2 + " " + tdNew3);
log.info(" deleting: " + conjDep + " " + prepOtherDep);
}
conjDep.setReln(KILL);
prepOtherDep.setReln(KILL);
// promote dtrs that would be orphaned
for (TypedDependency otd : otherDtrs) {
// likely to be a "dep" and we want this to be a "prep"
if (otd.dep().tag().equals("IN")) {
otd.setReln(PREPOSITIONAL_MODIFIER);
}
otd.setGov(td1.gov());
}
}
// it does, since they're not automatically deleted
for (TypedDependency td2 : possibles) {
if (td2.reln() != KILL) {
// td2.reln() != CONJUNCT) {
if (DEBUG) {
log.info("Changing " + td2 + " to have governor of " + td1 + " [b]");
}
td2.setGov(td1.gov());
}
}
// end for different prepositions
}
// below here is the single preposition/possessor basic case!!
for (TypedDependency td1 : list) {
if (td1.reln() == KILL) {
continue;
}
IndexedWord td1Dep = td1.dep();
String td1DepPOS = td1Dep.tag();
// find all other typedDeps having our dep as gov
Set<TypedDependency> possibles = map.get(td1Dep);
if (possibles != null && (td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
// look for the "second half"
// default for prep relation is prep_
boolean pobj = true;
for (TypedDependency td2 : possibles) {
if (td2.reln() != COORDINATION && td2.reln() != CONJUNCT) {
IndexedWord td2Dep = td2.dep();
String td2DepPOS = td2Dep.tag();
if ((td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
if (td2.reln() == POSSESSIVE_MODIFIER) {
if (!map.containsKey(td2Dep)) {
// if 's has no kids of its own (it shouldn't!)
td2.setReln(KILL);
}
}
} else if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO"))) && !isConjWithNoPrep(td2.gov(), possibles)) {
// OK, we have a pair td1, td2 to collapse to td3
if (DEBUG) {
log.info("(Single prep/poss base case collapsing " + td1 + " and " + td2);
}
// check whether we are in a pcomp case:
if (td2.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = false;
}
GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, pobj);
TypedDependency td3 = new TypedDependency(reln, td1.gov(), td2.dep());
if (DEBUG) {
log.info("PP adding: " + td3 + " deleting: " + td1 + ' ' + td2);
}
// add it to map to deal with recursive cases like "achieved this (PP (PP in part) with talent)"
map.get(td3.gov()).add(td3);
newTypedDeps.add(td3);
// remember these are "used up"
td1.setReln(KILL);
// remember these are "used up"
td2.setReln(KILL);
}
}
}
// for TypedDependency td2
}
// it does, since they're not automatically deleted
if (possibles != null && td1.reln() == KILL) {
for (TypedDependency td2 : possibles) {
if (td2.reln() != KILL) {
// td2.reln() != CONJUNCT) {
if (DEBUG) {
log.info("Changing " + td2 + " to have governor of " + td1 + " [c]");
}
td2.setGov(td1.gov());
}
}
}
}
// now remove typed dependencies with reln "kill" and add new ones.
for (Iterator<TypedDependency> iter = list.iterator(); iter.hasNext(); ) {
TypedDependency td = iter.next();
if (td.reln() == KILL) {
if (DEBUG) {
log.info("Removing dep killed in poss/prep (conj) collapse: " + td);
}
iter.remove();
}
}
list.addAll(newTypedDeps);
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalRelations method getNmod.
/**
* The "nmod" grammatical relation. Used to add case marker information
* to nominal modifier relations.<p>
* They will be turned into nmod:word, where "word" is a preposition.
*
* @param prepositionString The preposition to make a GrammaticalRelation out of
* @return A grammatical relation for this preposition
*/
public static GrammaticalRelation getNmod(String prepositionString) {
/* Check for nmod subtypes which are not stored in the `nmods` map. */
if (prepositionString.equals("npmod")) {
return NP_ADVERBIAL_MODIFIER;
} else if (prepositionString.equals("tmod")) {
return TEMPORAL_MODIFIER;
} else if (prepositionString.equals("poss")) {
return POSSESSION_MODIFIER;
}
GrammaticalRelation result = nmods.get(prepositionString);
if (result == null) {
synchronized (nmods) {
result = nmods.get(prepositionString);
if (result == null) {
result = new GrammaticalRelation(Language.UniversalEnglish, "nmod", "nmod_preposition", NOMINAL_MODIFIER, prepositionString);
nmods.put(prepositionString, result);
threadSafeAddRelation(result);
}
}
}
return result;
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalRelations method getAdvcl.
/**
* The "advcl" grammatical relation. Used to add case marker information
* to adverbial clause relations.<p>
* They will be turned into advcl:word, where "word" is a preposition.
*
* @param advclString The preposition to make a GrammaticalRelation out of
* @return A grammatical relation for this preposition
*/
public static GrammaticalRelation getAdvcl(String advclString) {
GrammaticalRelation result = advcls.get(advclString);
if (result == null) {
synchronized (advcls) {
result = advcls.get(advclString);
if (result == null) {
result = new GrammaticalRelation(Language.UniversalEnglish, "advcl", "advcl_preposition", ADV_CLAUSE_MODIFIER, advclString);
advcls.put(advclString, result);
threadSafeAddRelation(result);
}
}
}
return result;
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method addCaseMarkersToReln.
/**
* Appends case marker information to nmod/acl/advcl relations.
* <p/>
* E.g. if there is a relation {@code nmod(gov, dep)} and {@code case(dep, prep)}, then
* the {@code nmod} relation is renamed to {@code nmod:prep}.
*
*
* @param sg semantic graph
* @param gov governor of the nmod/acl/advcl relation
* @param mod modifier of the nmod/acl/advcl relation
* @param caseMarkers {@code List<IndexedWord>} of all the case markers that depend on mod
*/
private static void addCaseMarkersToReln(SemanticGraph sg, IndexedWord gov, IndexedWord mod, List<IndexedWord> caseMarkers) {
SemanticGraphEdge edge = sg.getEdge(gov, mod);
int lastCaseMarkerIndex = 0;
StringBuilder sb = new StringBuilder();
boolean firstWord = true;
for (IndexedWord cm : caseMarkers) {
/* check for adjacency */
if (lastCaseMarkerIndex == 0 || cm.index() == (lastCaseMarkerIndex + 1)) {
if (!firstWord) {
sb.append('_');
}
sb.append(cm.value());
firstWord = false;
} else {
/* Should never happen as there should be never two non-adjacent case markers.
* If it does happen nevertheless create an additional relation.
*/
GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
sg.addEdge(gov, mod, reln, Double.NEGATIVE_INFINITY, true);
sb = new StringBuilder(cm.value());
firstWord = true;
}
lastCaseMarkerIndex = cm.index();
}
GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
edge.setRelation(reln);
}
Aggregations