use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class ChineseGrammaticalStructure method collapsePrepAndPoss.
private static void collapsePrepAndPoss(Collection<TypedDependency> list) {
Collection<TypedDependency> newTypedDeps = new ArrayList<>();
// Construct a map from words to the set of typed
// dependencies in which the word appears as governor.
Map<IndexedWord, Set<TypedDependency>> map = Generics.newHashMap();
for (TypedDependency typedDep : list) {
if (!map.containsKey(typedDep.gov())) {
map.put(typedDep.gov(), Generics.<TypedDependency>newHashSet());
}
map.get(typedDep.gov()).add(typedDep);
}
for (TypedDependency td1 : list) {
if (td1.reln() != GrammaticalRelation.KILL) {
IndexedWord td1Dep = td1.dep();
String td1DepPOS = td1Dep.tag();
// find all other typedDeps having our dep as gov
Set<TypedDependency> possibles = map.get(td1Dep);
if (possibles != null) {
// look for the "second half"
for (TypedDependency td2 : possibles) {
// String td2DepPOS = td2Dep.parent().value();
if (td1.reln() == DEPENDENT && td2.reln() == DEPENDENT && td1DepPOS.equals("P")) {
GrammaticalRelation td3reln = ChineseGrammaticalRelations.valueOf(td1Dep.value());
if (td3reln == null) {
td3reln = GrammaticalRelation.valueOf(Language.Chinese, td1Dep.value());
}
TypedDependency td3 = new TypedDependency(td3reln, td1.gov(), td2.dep());
//log.info("adding: " + td3);
newTypedDeps.add(td3);
// remember these are "used up"
td1.setReln(GrammaticalRelation.KILL);
// remember these are "used up"
td2.setReln(GrammaticalRelation.KILL);
}
}
// longer appears. So, change its governor to 'drew'.
if (td1.reln().equals(GrammaticalRelation.KILL)) {
for (TypedDependency td2 : possibles) {
if (!td2.reln().equals(GrammaticalRelation.KILL)) {
//log.info("td1 & td2: " + td1 + " & " + td2);
td2.setGov(td1.gov());
}
}
}
}
}
}
// now copy remaining unkilled TDs from here to new
for (TypedDependency td : list) {
if (!td.reln().equals(GrammaticalRelation.KILL)) {
newTypedDeps.add(td);
}
}
// forget all (esp. killed) TDs
list.clear();
list.addAll(newTypedDeps);
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method collapseReferent.
/**
* This method will collapse a referent relation such as follows. e.g.:
* "The man that I love ... " ref(man, that) dobj(love, that) -> ref(man, that) dobj(love,
* man)
*/
private static void collapseReferent(SemanticGraph sg) {
// find typed deps of form ref(gov, dep)
// put them in a List for processing
List<SemanticGraphEdge> refs = new ArrayList<>(sg.findAllRelns(REFERENT));
SemanticGraph sgCopy = sg.makeSoftCopy();
// now substitute target of referent where possible
for (SemanticGraphEdge ref : refs) {
// take the relative word
IndexedWord dep = ref.getDependent();
// take the antecedent
IndexedWord ant = ref.getGovernor();
for (Iterator<SemanticGraphEdge> iter = sgCopy.incomingEdgeIterator(dep); iter.hasNext(); ) {
SemanticGraphEdge edge = iter.next();
// disconnected) [cdm Jan 2010]
if (edge.getRelation() != REFERENT && !edge.getGovernor().equals(ant)) {
sg.removeEdge(edge);
sg.addEdge(edge.getGovernor(), ant, edge.getRelation(), Double.NEGATIVE_INFINITY, true);
}
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method process3WP.
/**
* Processes all the three-word prepositions in THREE_WORD_PREPS.
*/
private static void process3WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> trigrams) {
for (String trigram : THREE_WORD_PREPS) {
if (trigrams.get(trigram) == null) {
continue;
}
for (Integer i : trigrams.get(trigram)) {
IndexedWord w1 = sg.getNodeByIndexSafe(i);
IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
IndexedWord w3 = sg.getNodeByIndexSafe(i + 2);
if (w1 == null || w2 == null || w3 == null) {
continue;
}
SemgrexMatcher matcher = THREE_WORD_PREPS_PATTERN.matcher(sg);
IndexedWord gov = null;
IndexedWord gov2 = null;
while (matcher.find()) {
if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2")) && w3.equals(matcher.getNode("w3"))) {
gov = matcher.getNode("gov");
gov2 = matcher.getNode("gov2");
break;
}
}
if (gov2 == null) {
continue;
}
GrammaticalRelation markerReln = CASE_MARKER;
if (sg.getRoots().contains(w2)) {
SemanticGraphEdge edge = sg.getEdge(w2, gov2);
if (edge == null) {
continue;
}
sg.removeEdge(edge);
sg.getRoots().remove(w2);
sg.addRoot(gov2);
} else {
SemanticGraphEdge edge = sg.getEdge(w2, gov2);
if (edge == null) {
continue;
}
sg.removeEdge(edge);
gov = gov == null ? sg.getParent(w2) : gov;
if (gov == null) {
continue;
}
GrammaticalRelation reln = sg.getEdge(gov, w2).getRelation();
if (reln == NOMINAL_MODIFIER && (edge.getRelation() == CLAUSAL_MODIFIER || edge.getRelation() == ADV_CLAUSE_MODIFIER)) {
reln = edge.getRelation();
markerReln = MARKER;
}
sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
}
/* Make children of w2 dependents of gov2. */
for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w2)) {
sg.removeEdge(edge2);
sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
}
createMultiWordExpression(sg, gov2, markerReln, w1, w2, w3);
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method addConjInformation.
/**
* Adds the type of conjunction to all conjunct relations.
* <p/>
* {@code cc(Marie, and)}, {@code conj(Marie, Chris)} and {@code conj(Marie, John)}
* become {@code cc(Marie, and)}, {@code conj:and(Marie, Chris)} and {@code conj:and(Marie, John)}.
* <p/>
* In case multiple coordination marker depend on the same governor
* the one that precedes the conjunct is appended to the conjunction relation or the
* first one if no preceding marker exists.
* <p/>
* Some multi-word coordination markers are collapsed to {@code conj:and} or {@code conj:negcc}.
* See {@link #conjValue(IndexedWord, SemanticGraph)}.
*
* @param sg A SemanticGraph from a sentence
*/
private static void addConjInformation(SemanticGraph sg) {
/* Semgrexes require a graph with a root. */
if (sg.getRoots().isEmpty())
return;
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = CONJUNCTION_PATTERN.matcher(sgCopy);
IndexedWord oldGov = null;
IndexedWord oldCcDep = null;
List<IndexedWord> conjDeps = Generics.newLinkedList();
while (matcher.find()) {
IndexedWord conjDep = matcher.getNode("conj");
IndexedWord gov = matcher.getNode("gov");
IndexedWord ccDep = matcher.getNode("cc");
if (oldGov != null && (!gov.equals(oldGov) || !ccDep.equals(oldCcDep))) {
addConjToReln(sg, oldGov, conjDeps, oldCcDep);
conjDeps = Generics.newLinkedList();
}
oldCcDep = ccDep;
conjDeps.add(conjDep);
oldGov = gov;
}
if (oldGov != null) {
addConjToReln(sg, oldGov, conjDeps, oldCcDep);
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method expandPrepConjunction.
/*
* Used by expandPrepConjunctions.
*/
private static void expandPrepConjunction(SemanticGraph sg, IndexedWord gov, List<IndexedWord> conjDeps, IndexedWord ccDep) {
IndexedWord caseGov = sg.getParent(gov);
if (caseGov == null)
return;
IndexedWord caseGovGov = sg.getParent(caseGov);
if (caseGovGov == null)
return;
IndexedWord conjGov = caseGovGov.getOriginal() != null ? caseGovGov.getOriginal() : caseGovGov;
GrammaticalRelation rel = sg.reln(caseGovGov, caseGov);
List<IndexedWord> newConjDeps = Generics.newLinkedList();
for (IndexedWord conjDep : conjDeps) {
//IndexedWord caseGovCopy = caseGov.makeSoftCopy();
IndexedWord caseGovGovCopy = caseGovGov.makeSoftCopy();
/* Change conj(prep-1, prep-2) to case(prep-1-gov-copy, prep-2) */
//SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
//sg.removeEdge(edge);
//sg.addEdge(caseGovCopy, conjDep, CASE_MARKER, Double.NEGATIVE_INFINITY, false);
/* Add relation to copy node. */
//sg.addEdge(caseGovGovCopy, caseGovCopy, rel, Double.NEGATIVE_INFINITY, false);
sg.addEdge(conjGov, caseGovGovCopy, CONJUNCT, Double.NEGATIVE_INFINITY, false);
newConjDeps.add(caseGovGovCopy);
sg.addEdge(caseGovGovCopy, caseGov, rel, Double.NEGATIVE_INFINITY, true);
List<IndexedWord> caseMarkers = Generics.newArrayList();
caseMarkers.add(conjDep);
addCaseMarkersToReln(sg, caseGovGovCopy, caseGov, caseMarkers);
/* Attach all children except case markers of caseGov to caseGovCopy. */
//for (SemanticGraphEdge e : sg.outgoingEdgeList(caseGov)) {
// if (e.getRelation() != CASE_MARKER && ! e.getDependent().equals(ccDep)) {
// sg.addEdge(caseGovCopy, e.getDependent(), e.getRelation(), Double.NEGATIVE_INFINITY, false);
// }
// }
}
/* Attach CC node to caseGov */
//SemanticGraphEdge edge = sg.getEdge(gov, ccDep);
//sg.removeEdge(edge);
//sg.addEdge(conjGov, ccDep, COORDINATION, Double.NEGATIVE_INFINITY, false);
/* Add conjunction information for these relations already at this point.
* It could be that we add several coordinating conjunctions while collapsing
* and we might not know which conjunction belongs to which conjunct at a later
* point.
*/
addConjToReln(sg, conjGov, newConjDeps, ccDep);
}
Aggregations