Search in sources :

Example 91 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method addStrandedPobj.

// Using this makes addStrandedPobj a lot cleaner looking, but it
// makes the converter roughly 2% slower.  Might not be worth it.
// Similar changes could be made to many of the other complicated
// collapsing methods.
// static final SemgrexPattern strandedPobjSemgrex = SemgrexPattern.compile("{}=head >rcmod ({} [ == {}=prepgov | >xcomp {}=prepgov | >conj {}=prepgov ]) : {}=prepgov >prep ({}=prepdep !>pcomp {} !> pobj {})");
// // Deal with preposition stranding in relative clauses.
// // For example, "the only thing I'm rooting for"
// // This method will add pobj(for, thing) by connecting using the rcmod and prep
// private static void addStrandedPobj(List<TypedDependency> list) {
//   SemanticGraph graph = new SemanticGraph(list);
//   SemgrexMatcher matcher = strandedPobjSemgrex.matcher(graph);
//   while (matcher.find()) {
//     IndexedWord gov = matcher.getNode("prepdep");
//     IndexedWord dep = matcher.getNode("head");
//     TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, gov, dep);
//     newDep.setExtra();
//     list.add(newDep);
//   }
// }
// Deal with preposition stranding in relative clauses.
// For example, "the only thing I'm rooting for"
// This method will add pobj(for, thing) by connecting using the rcmod and prep
private static void addStrandedPobj(List<TypedDependency> list) {
    List<IndexedWord> depNodes = null;
    List<TypedDependency> newDeps = null;
    for (TypedDependency rcmod : list) {
        if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
            continue;
        }
        IndexedWord head = rcmod.gov();
        if (depNodes == null) {
            depNodes = Generics.newArrayList();
        } else {
            depNodes.clear();
        }
        depNodes.add(rcmod.dep());
        for (TypedDependency connected : list) {
            if (connected.gov().equals(rcmod.dep()) && (connected.reln() == XCLAUSAL_COMPLEMENT || connected.reln() == CONJUNCT)) {
                depNodes.add(connected.dep());
            }
        }
        for (IndexedWord dep : depNodes) {
            for (TypedDependency prep : list) {
                if (!prep.gov().equals(dep) || prep.reln() != PREPOSITIONAL_MODIFIER) {
                    continue;
                }
                boolean found = false;
                for (TypedDependency other : list) {
                    if (other.gov().equals(prep.dep()) && (other.reln() == PREPOSITIONAL_COMPLEMENT || other.reln() == PREPOSITIONAL_OBJECT)) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    if (newDeps == null) {
                        newDeps = Generics.newArrayList();
                    }
                    TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, prep.dep(), head);
                    newDeps.add(newDep);
                }
            }
        }
    }
    if (newDeps != null) {
        list.addAll(newDeps);
    }
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 92 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method collapseMultiWordPrep.

/**
   * Collapse multiword preposition of the following format:
   * prep|advmod|dep|amod(gov, mwp0) dep(mpw0,mwp1) pobj|pcomp(mwp1, compl) or
   * pobj|pcomp(mwp0, compl) -&gt; prep_mwp0_mwp1(gov, compl)
   * <p/>
   *
   * @param list List of typedDependencies to work on,
   * @param newTypedDeps List of typedDependencies that we construct
   * @param str_mwp0 First part of the multiword preposition to construct the collapsed
   *          preposition
   * @param str_mwp1 Second part of the multiword preposition to construct the
   *          collapsed preposition
   * @param w_mwp0 First part of the multiword preposition that we look for
   * @param w_mwp1 Second part of the multiword preposition that we look for
   */
private static void collapseMultiWordPrep(Collection<TypedDependency> list, Collection<TypedDependency> newTypedDeps, String str_mwp0, String str_mwp1, String w_mwp0, String w_mwp1) {
    // first find the multiword_preposition: dep(mpw[0], mwp[1])
    // the two words should be next to another in the sentence (difference of
    // indexes = 1)
    IndexedWord mwp0 = null;
    IndexedWord mwp1 = null;
    TypedDependency dep = null;
    for (TypedDependency td : list) {
        if (td.gov().value().equalsIgnoreCase(w_mwp0) && td.dep().value().equalsIgnoreCase(w_mwp1) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
            mwp0 = td.gov();
            mwp1 = td.dep();
            dep = td;
        }
    }
    if (mwp0 == null) {
        return;
    }
    // now search for prep|advmod|dep|amod(gov, mwp0)
    IndexedWord governor = null;
    TypedDependency prep = null;
    for (TypedDependency td1 : list) {
        if ((td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == ADVERBIAL_MODIFIER || td1.reln() == ADJECTIVAL_MODIFIER || td1.reln() == DEPENDENT || td1.reln() == MULTI_WORD_EXPRESSION) && td1.dep().equals(mwp0)) {
            // we found prep|advmod|dep|amod(gov, mwp0)
            prep = td1;
            governor = prep.gov();
        }
    }
    if (prep == null) {
        return;
    }
    // search for the complement: pobj|pcomp(mwp1,X)
    // or for pobj|pcomp(mwp0,X)
    // There may be more than one in weird constructions; if there are several,
    // take the one with the LOWEST index!
    TypedDependency pobj = null;
    TypedDependency newtd = null;
    for (TypedDependency td2 : list) {
        if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td2.gov().equals(mwp1) || td2.gov().equals(mwp0))) {
            if (pobj == null || pobj.dep().index() > td2.dep().index()) {
                pobj = td2;
                // create the new gr relation
                GrammaticalRelation gr;
                if (td2.reln() == PREPOSITIONAL_COMPLEMENT) {
                    gr = EnglishGrammaticalRelations.getPrepC(str_mwp0 + '_' + str_mwp1);
                } else {
                    gr = EnglishGrammaticalRelations.getPrep(str_mwp0 + '_' + str_mwp1);
                }
                if (governor != null) {
                    newtd = new TypedDependency(gr, governor, pobj.dep());
                }
            }
        }
    }
    if (pobj == null || newtd == null) {
        return;
    }
    if (DEBUG) {
        log.info("Removing " + prep + ", " + dep + ", and " + pobj);
        log.info("  and adding " + newtd);
    }
    prep.setReln(KILL);
    dep.setReln(KILL);
    pobj.setReln(KILL);
    newTypedDeps.add(newtd);
    // and promote possible orphans
    for (TypedDependency td1 : list) {
        if (td1.reln() != KILL) {
            if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1)) {
                // one?
                if (td1.reln() == TEMPORAL_MODIFIER) {
                    // special case when an extra NP-TMP is buried in a PP for
                    // "during the same period last year"
                    td1.setGov(pobj.dep());
                } else {
                    td1.setGov(governor);
                }
            }
            if (!newTypedDeps.contains(td1)) {
                newTypedDeps.add(td1);
            }
        }
    }
    list.clear();
    list.addAll(newTypedDeps);
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 93 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method addRef.

/**
   * Look for ref rules for a given word.  We look through the
   * children and grandchildren of the rcmod dependency, and if any
   * children or grandchildren depend on a that/what/which/etc word,
   * we take the leftmost that/what/which/etc word as the dependent
   * for the ref TypedDependency.
   */
private static void addRef(Collection<TypedDependency> list) {
    List<TypedDependency> newDeps = new ArrayList<>();
    for (TypedDependency rcmod : list) {
        if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
            // we only add ref dependencies across relative clauses
            continue;
        }
        IndexedWord head = rcmod.gov();
        IndexedWord modifier = rcmod.dep();
        TypedDependency leftChild = null;
        for (TypedDependency child : list) {
            if (child.gov().equals(modifier) && EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(child.dep().value()).matches() && (leftChild == null || child.dep().index() < leftChild.dep().index())) {
                leftChild = child;
            }
        }
        // TODO: could be made more efficient
        TypedDependency leftGrandchild = null;
        for (TypedDependency child : list) {
            if (!child.gov().equals(modifier)) {
                continue;
            }
            for (TypedDependency grandchild : list) {
                if (grandchild.gov().equals(child.dep()) && EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(grandchild.dep().value()).matches() && (leftGrandchild == null || grandchild.dep().index() < leftGrandchild.dep().index())) {
                    leftGrandchild = grandchild;
                }
            }
        }
        TypedDependency newDep = null;
        if (leftGrandchild != null && (leftChild == null || leftGrandchild.dep().index() < leftChild.dep().index())) {
            newDep = new TypedDependency(REFERENT, head, leftGrandchild.dep());
        } else if (leftChild != null) {
            newDep = new TypedDependency(REFERENT, head, leftChild.dep());
        }
        if (newDep != null) {
            newDeps.add(newDep);
        }
    }
    for (TypedDependency newDep : newDeps) {
        if (!list.contains(newDep)) {
            newDep.setExtra();
            list.add(newDep);
        }
    }
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 94 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method collapse3WP.

/**
   * Collapse 3-word preposition of the following format: <br/>
   * This will be the case when the preposition is analyzed as a NP <br/>
   * prep(gov, mwp0) <br/>
   * X(mwp0,mwp1) <br/>
   * X(mwp1,mwp2) <br/>
   * pobj|pcomp(mwp2, compl) <br/>
   * -&gt; prep_mwp[0]_mwp[1]_mwp[2](gov, compl)
   * <p/>
   *
   * It also takes flat annotation into account: <br/>
   * prep(gov,mwp0) <br/>
   * X(mwp0,mwp1) <br/>
   * X(mwp0,mwp2) <br/>
   * pobj|pcomp(mwp0, compl) <br/>
   * -&gt; prep_mwp[0]_mwp[1]_mwp[2](gov, compl)
   * <p/>
   *
   *
   * @param list List of typedDependencies to work on
   */
private static void collapse3WP(Collection<TypedDependency> list) {
    Collection<TypedDependency> newTypedDeps = new ArrayList<>();
    // first, loop over the prepositions for NP annotation
    for (String[] mwp : THREEWORD_PREPS) {
        newTypedDeps.clear();
        IndexedWord mwp0 = null;
        IndexedWord mwp1 = null;
        IndexedWord mwp2 = null;
        TypedDependency dep1 = null;
        TypedDependency dep2 = null;
        for (TypedDependency td : list) {
            if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
                mwp0 = td.gov();
                mwp1 = td.dep();
                dep1 = td;
            }
        }
        for (TypedDependency td : list) {
            if (td.gov().equals(mwp1) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
                mwp2 = td.dep();
                dep2 = td;
            }
        }
        if (dep1 != null && dep2 != null) {
            // now search for prep(gov, mwp0)
            IndexedWord governor = null;
            TypedDependency prep = null;
            for (TypedDependency td1 : list) {
                if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().equals(mwp0)) {
                    // we
                    // found
                    // prep(gov,
                    // mwp0)
                    prep = td1;
                    governor = prep.gov();
                }
            }
            // search for the complement: pobj|pcomp(mwp2,X)
            TypedDependency pobj = null;
            TypedDependency newtd = null;
            for (TypedDependency td2 : list) {
                if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp2)) {
                    pobj = td2;
                    // create the new gr relation
                    GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
                    if (governor != null) {
                        newtd = new TypedDependency(gr, governor, pobj.dep());
                    }
                }
                if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp2)) {
                    pobj = td2;
                    // create the new gr relation
                    GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
                    if (governor != null) {
                        newtd = new TypedDependency(gr, governor, pobj.dep());
                    }
                }
            }
            // and add the new one
            if (prep != null && pobj != null && newtd != null) {
                prep.setReln(KILL);
                dep1.setReln(KILL);
                dep2.setReln(KILL);
                pobj.setReln(KILL);
                newTypedDeps.add(newtd);
                // and promote possible orphans
                for (TypedDependency td1 : list) {
                    if (td1.reln() != KILL) {
                        if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
                            td1.setGov(governor);
                        }
                        if (!newTypedDeps.contains(td1)) {
                            newTypedDeps.add(td1);
                        }
                    }
                }
                list.clear();
                list.addAll(newTypedDeps);
            }
        }
    }
    // second, loop again looking at flat annotation
    for (String[] mwp : THREEWORD_PREPS) {
        newTypedDeps.clear();
        IndexedWord mwp0 = null;
        IndexedWord mwp1 = null;
        IndexedWord mwp2 = null;
        TypedDependency dep1 = null;
        TypedDependency dep2 = null;
        // indexes = 1)
        for (TypedDependency td : list) {
            if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
                mwp0 = td.gov();
                mwp1 = td.dep();
                dep1 = td;
            }
        }
        // indexes = 2)
        for (TypedDependency td : list) {
            if (td.gov().equals(mwp0) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 2) {
                mwp2 = td.dep();
                dep2 = td;
            }
        }
        if (dep1 != null && dep2 != null) {
            // now search for prep(gov, mwp0)
            IndexedWord governor = null;
            TypedDependency prep = null;
            for (TypedDependency td1 : list) {
                if (td1.dep().equals(mwp0) && td1.reln() == PREPOSITIONAL_MODIFIER) {
                    // we
                    // found
                    // prep(gov,
                    // mwp0)
                    prep = td1;
                    governor = prep.gov();
                }
            }
            // search for the complement: pobj|pcomp(mwp0,X)
            TypedDependency pobj = null;
            TypedDependency newtd = null;
            for (TypedDependency td2 : list) {
                if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_OBJECT) {
                    pobj = td2;
                    // create the new gr relation
                    GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
                    if (governor != null) {
                        newtd = new TypedDependency(gr, governor, pobj.dep());
                    }
                }
                if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_COMPLEMENT) {
                    pobj = td2;
                    // create the new gr relation
                    GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
                    if (governor != null) {
                        newtd = new TypedDependency(gr, governor, pobj.dep());
                    }
                }
            }
            // and add the new one
            if (prep != null && pobj != null && newtd != null) {
                prep.setReln(KILL);
                dep1.setReln(KILL);
                dep2.setReln(KILL);
                pobj.setReln(KILL);
                newTypedDeps.add(newtd);
                // and promote possible orphans
                for (TypedDependency td1 : list) {
                    if (td1.reln() != KILL) {
                        if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
                            td1.setGov(governor);
                        }
                        if (!newTypedDeps.contains(td1)) {
                            newTypedDeps.add(td1);
                        }
                    }
                }
                list.clear();
                list.addAll(newTypedDeps);
            }
        }
    }
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 95 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method addExtraNSubj.

/**
   * Add extra nsubj dependencies when collapsing basic dependencies.
   * <br>
   * In the general case, we look for an aux modifier under an xcomp
   * modifier, and assuming there aren't already associated nsubj
   * dependencies as daughters of the original xcomp dependency, we
   * add nsubj dependencies for each nsubj daughter of the aux.
   * <br>
   * There is also a special case for "to" words, in which case we add
   * a dependency if and only if there is no nsubj associated with the
   * xcomp and there is no other aux dependency.  This accounts for
   * sentences such as "he decided not to" with no following verb.
   */
private static void addExtraNSubj(Collection<TypedDependency> list) {
    List<TypedDependency> newDeps = new ArrayList<>();
    for (TypedDependency xcomp : list) {
        if (xcomp.reln() != XCLAUSAL_COMPLEMENT) {
            // we only add extra nsubj dependencies to some xcomp dependencies
            continue;
        }
        IndexedWord modifier = xcomp.dep();
        IndexedWord head = xcomp.gov();
        boolean hasSubjectDaughter = false;
        boolean hasAux = false;
        List<IndexedWord> subjects = Generics.newArrayList();
        List<IndexedWord> objects = Generics.newArrayList();
        for (TypedDependency dep : list) {
            // already have a subject dependency
            if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(modifier)) {
                hasSubjectDaughter = true;
                break;
            }
            if (dep.reln() == AUX_MODIFIER && dep.gov().equals(modifier)) {
                hasAux = true;
            }
            if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(head)) {
                subjects.add(dep.dep());
            }
            if (dep.reln() == DIRECT_OBJECT && dep.gov().equals(head)) {
                objects.add(dep.dep());
            }
        }
        // if we already have an nsubj dependency, no need to add an extra nsubj
        if (hasSubjectDaughter) {
            continue;
        }
        if ((modifier.value().equalsIgnoreCase("to") && hasAux) || (!modifier.value().equalsIgnoreCase("to") && !hasAux)) {
            continue;
        }
        // Instead of nsubj(do, law) we want nsubj(do, them)
        if (objects.size() > 0) {
            for (IndexedWord object : objects) {
                TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, object);
                newDeps.add(newDep);
            }
        } else {
            for (IndexedWord subject : subjects) {
                TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, subject);
                newDeps.add(newDep);
            }
        }
    }
    for (TypedDependency newDep : newDeps) {
        if (!list.contains(newDep)) {
            newDep.setExtra();
            list.add(newDep);
        }
    }
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5