use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class GrammaticalStructure method getDeps.
/**
* Helps the constructor build a list of typed dependencies using
* information from a {@code GrammaticalStructure}.
*/
private List<TypedDependency> getDeps(Predicate<TypedDependency> puncTypedDepFilter, DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> basicGraph) {
List<TypedDependency> basicDep = Generics.newArrayList();
for (TreeGraphNode gov : basicGraph.getAllVertices()) {
for (TreeGraphNode dep : basicGraph.getChildren(gov)) {
GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov.headWordNode().label(), gov.label(), dep.headWordNode().label(), dep.label(), basicGraph.getEdges(gov, dep));
// log.info(" Gov: " + gov + " Dep: " + dep + " Reln: " + reln);
basicDep.add(new TypedDependency(reln, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label())));
}
}
// add the root
TreeGraphNode dependencyRoot = new TreeGraphNode(new Word("ROOT"));
dependencyRoot.setIndex(0);
TreeGraphNode rootDep = root().headWordNode();
if (rootDep == null) {
List<Tree> leaves = Trees.leaves(root());
if (leaves.size() > 0) {
Tree leaf = leaves.get(0);
if (!(leaf instanceof TreeGraphNode)) {
throw new AssertionError("Leaves should be TreeGraphNodes");
}
rootDep = (TreeGraphNode) leaf;
if (rootDep.headWordNode() != null) {
rootDep = rootDep.headWordNode();
}
}
}
if (rootDep != null) {
TypedDependency rootTypedDep = new TypedDependency(ROOT, new IndexedWord(dependencyRoot.label()), new IndexedWord(rootDep.label()));
if (puncTypedDepFilter.test(rootTypedDep)) {
basicDep.add(rootTypedDep);
} else {
// Root is a punctuation character
/* Heuristic to find a root for the graph.
* Make the first child of the current root the
* new root and attach all other children to
* the new root.
*/
IndexedWord root = rootTypedDep.dep();
IndexedWord newRoot = null;
Collections.sort(basicDep);
for (TypedDependency td : basicDep) {
if (td.gov().equals(root)) {
if (newRoot != null) {
td.setGov(newRoot);
} else {
td.setGov(td.gov());
td.setReln(ROOT);
newRoot = td.dep();
}
}
}
}
}
postProcessDependencies(basicDep);
Collections.sort(basicDep);
return basicDep;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class GrammaticalStructure method getRoots.
/**
* Return a list of TypedDependencies which are not dependent on any node from the list.
*
* @param list The list of TypedDependencies to check
* @return A list of TypedDependencies which are not dependent on any node from the list
*/
public static Collection<TypedDependency> getRoots(Collection<TypedDependency> list) {
Collection<TypedDependency> roots = new ArrayList<>();
// need to see if more than one governor is not listed somewhere as a dependent
// first take all the deps
Collection<IndexedWord> deps = Generics.newHashSet();
for (TypedDependency typedDep : list) {
deps.add(typedDep.dep());
}
// go through the list and add typedDependency for which the gov is not a dep
Collection<IndexedWord> govs = Generics.newHashSet();
for (TypedDependency typedDep : list) {
IndexedWord gov = typedDep.gov();
if (!deps.contains(gov) && !govs.contains(gov)) {
roots.add(typedDep);
}
govs.add(gov);
}
return roots;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class GrammaticalStructure method getGrammaticalRelation.
// end static class NoPunctTypedDependencyFilter
/**
* Get GrammaticalRelation between gov and dep, and null if gov is not the
* governor of dep
*/
public GrammaticalRelation getGrammaticalRelation(int govIndex, int depIndex) {
TreeGraphNode gov = getNodeByIndex(govIndex);
TreeGraphNode dep = getNodeByIndex(depIndex);
// TODO: this is pretty ugly
return getGrammaticalRelation(new IndexedWord(gov.label()), new IndexedWord(dep.label()));
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class GrammaticalStructure method getTreeDeps.
/** Look through the tree t and adds to the List basicDep
* additional dependencies which aren't
* in the List but which satisfy the filter puncTypedDepFilter.
*
* @param deps The list of dependencies which may be augmented
* @param completeGraph a graph of all the tree dependencies found earlier
* @param puncTypedDepFilter The filter that may skip punctuation dependencies
* @param extraTreeDepFilter Additional dependencies are added only if they pass this filter
*/
protected void getTreeDeps(List<TypedDependency> deps, DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> completeGraph, Predicate<TypedDependency> puncTypedDepFilter, Predicate<TypedDependency> extraTreeDepFilter) {
for (TreeGraphNode gov : completeGraph.getAllVertices()) {
for (TreeGraphNode dep : completeGraph.getChildren(gov)) {
for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) {
TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label()));
if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) {
newDep.setExtra();
deps.add(newDep);
}
}
}
}
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method treatCC.
private static void treatCC(Collection<TypedDependency> list) {
// Construct a map from tree nodes to the set of typed
// dependencies in which the node appears as dependent.
Map<IndexedWord, Set<TypedDependency>> map = Generics.newHashMap();
// Construct a map of tree nodes being governor of a subject grammatical
// relation to that relation
Map<IndexedWord, TypedDependency> subjectMap = Generics.newHashMap();
// Construct a set of TreeGraphNodes with a passive auxiliary on them
Set<IndexedWord> withPassiveAuxiliary = Generics.newHashSet();
// Construct a map of tree nodes being governor of an object grammatical
// relation to that relation
// Map<TreeGraphNode, TypedDependency> objectMap = new
// HashMap<TreeGraphNode, TypedDependency>();
List<IndexedWord> rcmodHeads = Generics.newArrayList();
List<IndexedWord> prepcDep = Generics.newArrayList();
for (TypedDependency typedDep : list) {
if (!map.containsKey(typedDep.dep())) {
// NB: Here and in other places below, we use a TreeSet (which extends
// SortedSet) to guarantee that results are deterministic)
map.put(typedDep.dep(), new TreeSet<>());
}
map.get(typedDep.dep()).add(typedDep);
if (typedDep.reln().equals(AUX_PASSIVE_MODIFIER)) {
withPassiveAuxiliary.add(typedDep.gov());
}
// look for subjects
if (typedDep.reln().getParent() == NOMINAL_SUBJECT || typedDep.reln().getParent() == SUBJECT || typedDep.reln().getParent() == CLAUSAL_SUBJECT) {
if (!subjectMap.containsKey(typedDep.gov())) {
subjectMap.put(typedDep.gov(), typedDep);
}
}
// look for rcmod relations
if (typedDep.reln() == RELATIVE_CLAUSE_MODIFIER) {
rcmodHeads.add(typedDep.gov());
}
// to avoid wrong propagation of dobj
if (typedDep.reln().toString().startsWith("prepc")) {
prepcDep.add(typedDep.dep());
}
}
// log.info(map);
// if (DEBUG) log.info("Subject map: " + subjectMap);
// if (DEBUG) log.info("Object map: " + objectMap);
// log.info(rcmodHeads);
// create a new list of typed dependencies
Collection<TypedDependency> newTypedDeps = new ArrayList<>(list);
// find typed deps of form conj(gov,dep)
for (TypedDependency td : list) {
if (EnglishGrammaticalRelations.getConjs().contains(td.reln())) {
IndexedWord gov = td.gov();
IndexedWord dep = td.dep();
// look at the dep in the conjunct
Set<TypedDependency> gov_relations = map.get(gov);
// log.info("gov " + gov);
if (gov_relations != null) {
for (TypedDependency td1 : gov_relations) {
// log.info("gov rel " + td1);
IndexedWord newGov = td1.gov();
// is possible to have overlapping newGov & dep
if (newGov.equals(dep)) {
continue;
}
GrammaticalRelation newRel = td1.reln();
if (newRel != ROOT) {
if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) {
// to prevent wrong propagation in the case of long dependencies in relative clauses
if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) {
if (DEBUG) {
log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)");
}
newTypedDeps.add(new TypedDependency(newRel, newGov, dep));
}
} else {
if (DEBUG) {
log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep);
}
newTypedDeps.add(new TypedDependency(newRel, newGov, dep));
}
}
}
}
// propagate subjects
// look at the gov in the conjunct: if it is has a subject relation,
// the dep is a verb and the dep doesn't have a subject relation
// then we want to add a subject relation for the dep.
// (By testing for the dep to be a verb, we are going to miss subject of
// copula verbs! but
// is it safe to relax this assumption?? i.e., just test for the subject
// part)
// CDM 2008: I also added in JJ, since participial verbs are often
// tagged JJ
String tag = dep.tag();
if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && !subjectMap.containsKey(dep)) {
TypedDependency tdsubj = subjectMap.get(gov);
// check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
// add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
GrammaticalRelation relation = tdsubj.reln();
if (relation == NOMINAL_PASSIVE_SUBJECT) {
if (isDefinitelyActive(tag)) {
relation = NOMINAL_SUBJECT;
}
} else if (relation == CLAUSAL_PASSIVE_SUBJECT) {
if (isDefinitelyActive(tag)) {
relation = CLAUSAL_SUBJECT;
}
} else if (relation == NOMINAL_SUBJECT) {
if (withPassiveAuxiliary.contains(dep)) {
relation = NOMINAL_PASSIVE_SUBJECT;
}
} else if (relation == CLAUSAL_SUBJECT) {
if (withPassiveAuxiliary.contains(dep)) {
relation = CLAUSAL_PASSIVE_SUBJECT;
}
}
if (DEBUG) {
log.info("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.dep() + " (subj propagation case)");
}
newTypedDeps.add(new TypedDependency(relation, dep, tdsubj.dep()));
}
// propagate objects
// cdm july 2010: This bit of code would copy a dobj from the first
// clause to a later conjoined clause if it didn't
// contain its own dobj or prepc. But this is too aggressive and wrong
// if the later clause is intransitive
// (including passivized cases) and so I think we have to not have this
// done always, and see no good "sometimes" heuristic.
// IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
// (SBAR).
// if (objectMap.containsKey(gov) &&
// dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
// && ! prepcDep.contains(gov)) {
// TypedDependency tdobj = objectMap.get(gov);
// if (DEBUG) {
// log.info("Adding new " + tdobj.reln() + " dependency from "
// + dep + " to " + tdobj.dep() + " (obj propagation case)");
// }
// newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
// tdobj.dep()));
// }
}
}
list.clear();
list.addAll(newTypedDeps);
}
Aggregations