Examples with IndexedWord - edu.stanford.nlp.ling.IndexedWord

Example 86 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class GrammaticalStructure method getDeps.

/**
   * Helps the constructor build a list of typed dependencies using
   * information from a {@code GrammaticalStructure}.
   */
private List<TypedDependency> getDeps(Predicate<TypedDependency> puncTypedDepFilter, DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> basicGraph) {
    List<TypedDependency> basicDep = Generics.newArrayList();
    for (TreeGraphNode gov : basicGraph.getAllVertices()) {
        for (TreeGraphNode dep : basicGraph.getChildren(gov)) {
            GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov.headWordNode().label(), gov.label(), dep.headWordNode().label(), dep.label(), basicGraph.getEdges(gov, dep));
            // log.info("  Gov: " + gov + " Dep: " + dep + " Reln: " + reln);
            basicDep.add(new TypedDependency(reln, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label())));
        }
    }
    // add the root
    TreeGraphNode dependencyRoot = new TreeGraphNode(new Word("ROOT"));
    dependencyRoot.setIndex(0);
    TreeGraphNode rootDep = root().headWordNode();
    if (rootDep == null) {
        List<Tree> leaves = Trees.leaves(root());
        if (leaves.size() > 0) {
            Tree leaf = leaves.get(0);
            if (!(leaf instanceof TreeGraphNode)) {
                throw new AssertionError("Leaves should be TreeGraphNodes");
            }
            rootDep = (TreeGraphNode) leaf;
            if (rootDep.headWordNode() != null) {
                rootDep = rootDep.headWordNode();
            }
        }
    }
    if (rootDep != null) {
        TypedDependency rootTypedDep = new TypedDependency(ROOT, new IndexedWord(dependencyRoot.label()), new IndexedWord(rootDep.label()));
        if (puncTypedDepFilter.test(rootTypedDep)) {
            basicDep.add(rootTypedDep);
        } else {
            // Root is a punctuation character
            /* Heuristic to find a root for the graph.
         * Make the first child of the current root the
         * new root and attach all other children to
         * the new root.
         */
            IndexedWord root = rootTypedDep.dep();
            IndexedWord newRoot = null;
            Collections.sort(basicDep);
            for (TypedDependency td : basicDep) {
                if (td.gov().equals(root)) {
                    if (newRoot != null) {
                        td.setGov(newRoot);
                    } else {
                        td.setGov(td.gov());
                        td.setReln(ROOT);
                        newRoot = td.dep();
                    }
                }
            }
        }
    }
    postProcessDependencies(basicDep);
    Collections.sort(basicDep);
    return basicDep;
}

Also used : Word(edu.stanford.nlp.ling.Word) IndexedWord(edu.stanford.nlp.ling.IndexedWord) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 87 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class GrammaticalStructure method getRoots.

/**
   * Return a list of TypedDependencies which are not dependent on any node from the list.
   *
   * @param list The list of TypedDependencies to check
   * @return A list of TypedDependencies which are not dependent on any node from the list
   */
public static Collection<TypedDependency> getRoots(Collection<TypedDependency> list) {
    Collection<TypedDependency> roots = new ArrayList<>();
    // need to see if more than one governor is not listed somewhere as a dependent
    // first take all the deps
    Collection<IndexedWord> deps = Generics.newHashSet();
    for (TypedDependency typedDep : list) {
        deps.add(typedDep.dep());
    }
    // go through the list and add typedDependency for which the gov is not a dep
    Collection<IndexedWord> govs = Generics.newHashSet();
    for (TypedDependency typedDep : list) {
        IndexedWord gov = typedDep.gov();
        if (!deps.contains(gov) && !govs.contains(gov)) {
            roots.add(typedDep);
        }
        govs.add(gov);
    }
    return roots;
}

Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 88 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class GrammaticalStructure method getGrammaticalRelation.

// end static class NoPunctTypedDependencyFilter
/**
   * Get GrammaticalRelation between gov and dep, and null if gov  is not the
   * governor of dep
   */
public GrammaticalRelation getGrammaticalRelation(int govIndex, int depIndex) {
    TreeGraphNode gov = getNodeByIndex(govIndex);
    TreeGraphNode dep = getNodeByIndex(depIndex);
    // TODO: this is pretty ugly
    return getGrammaticalRelation(new IndexedWord(gov.label()), new IndexedWord(dep.label()));
}

Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 89 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class GrammaticalStructure method getTreeDeps.

/** Look through the tree t and adds to the List basicDep
   *  additional dependencies which aren't
   *  in the List but which satisfy the filter puncTypedDepFilter.
   *
   * @param deps The list of dependencies which may be augmented
   * @param completeGraph a graph of all the tree dependencies found earlier
   * @param puncTypedDepFilter The filter that may skip punctuation dependencies
   * @param extraTreeDepFilter Additional dependencies are added only if they pass this filter
   */
protected void getTreeDeps(List<TypedDependency> deps, DirectedMultiGraph<TreeGraphNode, GrammaticalRelation> completeGraph, Predicate<TypedDependency> puncTypedDepFilter, Predicate<TypedDependency> extraTreeDepFilter) {
    for (TreeGraphNode gov : completeGraph.getAllVertices()) {
        for (TreeGraphNode dep : completeGraph.getChildren(gov)) {
            for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) {
                TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label()));
                if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) {
                    newDep.setExtra();
                    deps.add(newDep);
                }
            }
        }
    }
}

Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 90 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method treatCC.

private static void treatCC(Collection<TypedDependency> list) {
    // Construct a map from tree nodes to the set of typed
    // dependencies in which the node appears as dependent.
    Map<IndexedWord, Set<TypedDependency>> map = Generics.newHashMap();
    // Construct a map of tree nodes being governor of a subject grammatical
    // relation to that relation
    Map<IndexedWord, TypedDependency> subjectMap = Generics.newHashMap();
    // Construct a set of TreeGraphNodes with a passive auxiliary on them
    Set<IndexedWord> withPassiveAuxiliary = Generics.newHashSet();
    // Construct a map of tree nodes being governor of an object grammatical
    // relation to that relation
    // Map<TreeGraphNode, TypedDependency> objectMap = new
    // HashMap<TreeGraphNode, TypedDependency>();
    List<IndexedWord> rcmodHeads = Generics.newArrayList();
    List<IndexedWord> prepcDep = Generics.newArrayList();
    for (TypedDependency typedDep : list) {
        if (!map.containsKey(typedDep.dep())) {
            // NB: Here and in other places below, we use a TreeSet (which extends
            // SortedSet) to guarantee that results are deterministic)
            map.put(typedDep.dep(), new TreeSet<>());
        }
        map.get(typedDep.dep()).add(typedDep);
        if (typedDep.reln().equals(AUX_PASSIVE_MODIFIER)) {
            withPassiveAuxiliary.add(typedDep.gov());
        }
        // look for subjects
        if (typedDep.reln().getParent() == NOMINAL_SUBJECT || typedDep.reln().getParent() == SUBJECT || typedDep.reln().getParent() == CLAUSAL_SUBJECT) {
            if (!subjectMap.containsKey(typedDep.gov())) {
                subjectMap.put(typedDep.gov(), typedDep);
            }
        }
        // look for rcmod relations
        if (typedDep.reln() == RELATIVE_CLAUSE_MODIFIER) {
            rcmodHeads.add(typedDep.gov());
        }
        // to avoid wrong propagation of dobj
        if (typedDep.reln().toString().startsWith("prepc")) {
            prepcDep.add(typedDep.dep());
        }
    }
    // log.info(map);
    // if (DEBUG) log.info("Subject map: " + subjectMap);
    // if (DEBUG) log.info("Object map: " + objectMap);
    // log.info(rcmodHeads);
    // create a new list of typed dependencies
    Collection<TypedDependency> newTypedDeps = new ArrayList<>(list);
    // find typed deps of form conj(gov,dep)
    for (TypedDependency td : list) {
        if (EnglishGrammaticalRelations.getConjs().contains(td.reln())) {
            IndexedWord gov = td.gov();
            IndexedWord dep = td.dep();
            // look at the dep in the conjunct
            Set<TypedDependency> gov_relations = map.get(gov);
            // log.info("gov " + gov);
            if (gov_relations != null) {
                for (TypedDependency td1 : gov_relations) {
                    // log.info("gov rel " + td1);
                    IndexedWord newGov = td1.gov();
                    // is possible to have overlapping newGov & dep
                    if (newGov.equals(dep)) {
                        continue;
                    }
                    GrammaticalRelation newRel = td1.reln();
                    if (newRel != ROOT) {
                        if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) {
                            // to prevent wrong propagation in the case of long dependencies in relative clauses
                            if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) {
                                if (DEBUG) {
                                    log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)");
                                }
                                newTypedDeps.add(new TypedDependency(newRel, newGov, dep));
                            }
                        } else {
                            if (DEBUG) {
                                log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep);
                            }
                            newTypedDeps.add(new TypedDependency(newRel, newGov, dep));
                        }
                    }
                }
            }
            // propagate subjects
            // look at the gov in the conjunct: if it is has a subject relation,
            // the dep is a verb and the dep doesn't have a subject relation
            // then we want to add a subject relation for the dep.
            // (By testing for the dep to be a verb, we are going to miss subject of
            // copula verbs! but
            // is it safe to relax this assumption?? i.e., just test for the subject
            // part)
            // CDM 2008: I also added in JJ, since participial verbs are often
            // tagged JJ
            String tag = dep.tag();
            if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && !subjectMap.containsKey(dep)) {
                TypedDependency tdsubj = subjectMap.get(gov);
                // check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
                // add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
                GrammaticalRelation relation = tdsubj.reln();
                if (relation == NOMINAL_PASSIVE_SUBJECT) {
                    if (isDefinitelyActive(tag)) {
                        relation = NOMINAL_SUBJECT;
                    }
                } else if (relation == CLAUSAL_PASSIVE_SUBJECT) {
                    if (isDefinitelyActive(tag)) {
                        relation = CLAUSAL_SUBJECT;
                    }
                } else if (relation == NOMINAL_SUBJECT) {
                    if (withPassiveAuxiliary.contains(dep)) {
                        relation = NOMINAL_PASSIVE_SUBJECT;
                    }
                } else if (relation == CLAUSAL_SUBJECT) {
                    if (withPassiveAuxiliary.contains(dep)) {
                        relation = CLAUSAL_PASSIVE_SUBJECT;
                    }
                }
                if (DEBUG) {
                    log.info("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.dep() + " (subj propagation case)");
                }
                newTypedDeps.add(new TypedDependency(relation, dep, tdsubj.dep()));
            }
        // propagate objects
        // cdm july 2010: This bit of code would copy a dobj from the first
        // clause to a later conjoined clause if it didn't
        // contain its own dobj or prepc. But this is too aggressive and wrong
        // if the later clause is intransitive
        // (including passivized cases) and so I think we have to not have this
        // done always, and see no good "sometimes" heuristic.
        // IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
        // (SBAR).
        // if (objectMap.containsKey(gov) &&
        // dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
        // && ! prepcDep.contains(gov)) {
        // TypedDependency tdobj = objectMap.get(gov);
        // if (DEBUG) {
        // log.info("Adding new " + tdobj.reln() + " dependency from "
        // + dep + " to " + tdobj.dep() + " (obj propagation case)");
        // }
        // newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
        // tdobj.dep()));
        // }
        }
    }
    list.clear();
    list.addAll(newTypedDeps);
}

Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5