Search in sources :

Example 16 with GrammaticalRelation

use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method removeDep.

/**
   * Remove duplicate relations: it can happen when collapsing stranded
   * prepositions. E.g., "What does CPR stand for?" we get dep(stand, what), and
   * after collapsing we also get prep_for(stand, what).
   *
   * @param list A list of typed dependencies to check through
   */
private static void removeDep(Collection<TypedDependency> list) {
    Set<GrammaticalRelation> prepRels = Generics.newHashSet(EnglishGrammaticalRelations.getPreps());
    prepRels.addAll(EnglishGrammaticalRelations.getPrepsC());
    for (TypedDependency td1 : list) {
        if (prepRels.contains(td1.reln())) {
            // if we have a prep_ relation
            IndexedWord gov = td1.gov();
            IndexedWord dep = td1.dep();
            for (TypedDependency td2 : list) {
                if (td2.reln() == DEPENDENT && td2.gov().equals(gov) && td2.dep().equals(dep)) {
                    td2.setReln(KILL);
                }
            }
        }
    }
    // now remove typed dependencies with reln "kill"
    for (Iterator<TypedDependency> iter = list.iterator(); iter.hasNext(); ) {
        TypedDependency td = iter.next();
        if (td.reln() == KILL) {
            if (DEBUG) {
                log.info("Removing duplicate relation: " + td);
            }
            iter.remove();
        }
    }
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 17 with GrammaticalRelation

use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method collapsePrepAndPoss.

private static void collapsePrepAndPoss(Collection<TypedDependency> list) {
    // Man oh man, how gnarly is the logic of this method....
    Collection<TypedDependency> newTypedDeps = new ArrayList<>();
    // Construct a map from tree nodes to the set of typed
    // dependencies in which the node appears as governor.
    // cdm: could use CollectionValuedMap here!
    Map<IndexedWord, SortedSet<TypedDependency>> map = Generics.newHashMap();
    List<IndexedWord> vmod = Generics.newArrayList();
    for (TypedDependency typedDep : list) {
        if (!map.containsKey(typedDep.gov())) {
            map.put(typedDep.gov(), new TreeSet<>());
        }
        map.get(typedDep.gov()).add(typedDep);
        if (typedDep.reln() == VERBAL_MODIFIER) {
            // look for aux deps which indicate this was a to-be verb
            boolean foundAux = false;
            for (TypedDependency auxDep : list) {
                if (auxDep.reln() != AUX_MODIFIER) {
                    continue;
                }
                if (!auxDep.gov().equals(typedDep.dep()) || !auxDep.dep().value().equalsIgnoreCase("to")) {
                    continue;
                }
                foundAux = true;
                break;
            }
            if (!foundAux) {
                vmod.add(typedDep.dep());
            }
        }
    }
    for (TypedDependency td1 : list) {
        if (td1.reln() != PREPOSITIONAL_MODIFIER) {
            continue;
        }
        IndexedWord td1Dep = td1.dep();
        SortedSet<TypedDependency> possibles = map.get(td1Dep);
        if (possibles == null) {
            continue;
        }
        // look for the "second half"
        // unique: the head prep and whether it should be pobj
        Pair<TypedDependency, Boolean> prepDep = null;
        // treat as unique
        TypedDependency ccDep = null;
        // list of dep and prepOtherDep and pobj (or pcomp)
        List<Triple<TypedDependency, TypedDependency, Boolean>> conjs = new ArrayList<>();
        Set<TypedDependency> otherDtrs = new TreeSet<>();
        // first look for a conj(prep, prep) (there might be several conj relations!!!)
        boolean samePrepositionInEachConjunct = true;
        int conjIndex = -1;
        for (TypedDependency td2 : possibles) {
            if (td2.reln() == CONJUNCT) {
                IndexedWord td2Dep = td2.dep();
                String td2DepPOS = td2Dep.tag();
                if (td2DepPOS.equals("IN") || td2DepPOS.equals("TO")) {
                    samePrepositionInEachConjunct = samePrepositionInEachConjunct && td2Dep.value().equals(td1Dep.value());
                    Set<TypedDependency> possibles2 = map.get(td2Dep);
                    // default of collapsing preposition is prep_
                    boolean pobj = true;
                    TypedDependency prepOtherDep = null;
                    if (possibles2 != null) {
                        for (TypedDependency td3 : possibles2) {
                            IndexedWord td3Dep = td3.dep();
                            String td3DepPOS = td3Dep.tag();
                            // problem with multiple prep (mcdm)
                            if ((td3.reln() == PREPOSITIONAL_OBJECT || td3.reln() == PREPOSITIONAL_COMPLEMENT) && (!(td3DepPOS.equals("IN") || td3DepPOS.equals("TO"))) && prepOtherDep == null) {
                                prepOtherDep = td3;
                                if (td3.reln() == PREPOSITIONAL_COMPLEMENT) {
                                    pobj = false;
                                }
                            } else {
                                otherDtrs.add(td3);
                            }
                        }
                    }
                    if (conjIndex < td2Dep.index()) {
                        conjIndex = td2Dep.index();
                    }
                    conjs.add(new Triple<>(td2, prepOtherDep, pobj));
                }
            }
        }
        if (conjs.isEmpty()) {
            continue;
        }
        // if we have a conj under a preposition dependency, we look for the other
        // parts
        String td1DepPOS = td1Dep.tag();
        for (TypedDependency td2 : possibles) {
            // the cc dep must have an index smaller than the dep of conjDep
            if (td2.reln() == COORDINATION && td2.dep().index() < conjIndex) {
                ccDep = td2;
            } else {
                IndexedWord td2Dep = td2.dep();
                String td2DepPOS = td2Dep.tag();
                // "; td2.dep().index(): " + td2.dep().index());
                if ((td2.reln() == DEPENDENT || td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && prepDep == null && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO")))) {
                    // same index trick, in case we have multiple deps
                    // I deleted this to see if it helped [cdm Jan 2010] &&
                    // td2.dep().index() < index)
                    prepDep = new Pair<>(td2, td2.reln() != PREPOSITIONAL_COMPLEMENT);
                } else if (!inConjDeps(td2, conjs)) {
                    // don't want to add the conjDep
                    // again!
                    otherDtrs.add(td2);
                }
            }
        }
        if (prepDep == null || ccDep == null) {
            // we can't deal with it in the hairy prep/conj interaction case!
            continue;
        }
        if (DEBUG) {
            // ccDep must be non-null given test above
            log.info("!! Conj and prep case:");
            log.info("  td1 (prep): " + td1);
            log.info("  Kids of td1 are: " + possibles);
            log.info("  prepDep: " + prepDep);
            log.info("  ccDep: " + ccDep);
            log.info("  conjs: " + conjs);
            log.info("  samePrepositionInEachConjunct: " + samePrepositionInEachConjunct);
            log.info("  otherDtrs: " + otherDtrs);
        }
        // check if we have the same prepositions in the conjunction
        if (samePrepositionInEachConjunct) {
            // conjDep != null && prepOtherDep !=
            // null &&
            // OK, we have a conjunction over parallel PPs: Fred flew to Greece and
            // to Serbia.
            GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
            TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
            newTypedDeps.add(tdNew);
            if (DEBUG) {
                log.info("PrepPoss Conj branch (two parallel PPs) adding: " + tdNew);
                log.info("  removing: " + td1 + "  " + prepDep + "  " + ccDep);
            }
            // remember these are "used up"
            td1.setReln(KILL);
            prepDep.first().setReln(KILL);
            ccDep.setReln(KILL);
            for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
                TypedDependency conjDep = trip.first();
                TypedDependency prepOtherDep = trip.second();
                if (prepOtherDep == null) {
                    // prepositions the same. We just clean up the mess.
                    if (DEBUG) {
                        log.info("  apparent misparse: same P twice with only one NP object (prepOtherDep is null)");
                        log.info("  removing: " + conjDep);
                    }
                    ccDep.setReln(KILL);
                } else {
                    TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), prepDep.first().dep(), prepOtherDep.dep());
                    newTypedDeps.add(tdNew2);
                    if (DEBUG) {
                        log.info("  adding: " + tdNew2);
                        log.info("  removing: " + conjDep + "  " + prepOtherDep);
                    }
                    prepOtherDep.setReln(KILL);
                }
                conjDep.setReln(KILL);
            }
            // promote dtrs that would be orphaned
            for (TypedDependency otd : otherDtrs) {
                if (DEBUG) {
                    log.info("Changed " + otd);
                }
                otd.setGov(td1.gov());
                if (DEBUG) {
                    log.info(" to " + otd);
                }
            }
            // Some things in possibles may have already been changed, so check gov
            if (DEBUG) {
                log.info("td1: " + td1 + "; possibles: " + possibles);
            }
            for (TypedDependency td2 : possibles) {
                // }
                if (td2.reln() != KILL && td2.gov().equals(td1.dep())) {
                    // != CONJUNCT
                    if (DEBUG) {
                        log.info("Changing " + td2 + " to have governor of " + td1 + " [a]");
                    }
                    td2.setGov(td1.gov());
                }
            }
            // This one has been dealt with successfully
            continue;
        }
        // that is, the prepOtherDep should be the same as prepDep !
        for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
            if (trip.first() != null && trip.second() == null) {
                trip.setSecond(new TypedDependency(prepDep.first().reln(), trip.first().dep(), prepDep.first().dep()));
                trip.setThird(prepDep.second());
            }
        }
        // we have two different prepositions in the conjunction
        // in this case we need to add a node
        // "Bill jumped over the fence and through the hoop"
        // prep_over(jumped, fence)
        // conj_and(jumped, jumped)
        // prep_through(jumped, hoop)
        // Extra complication:
        // If "jumped" is already part of a conjunction, we should add the new one off that rather than chaining
        IndexedWord conjHead = td1.gov();
        for (TypedDependency td3 : list) {
            if (td3.dep().equals(td1.gov()) && td3.reln().equals(CONJUNCT)) {
                conjHead = td3.gov();
            }
        }
        GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
        TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
        newTypedDeps.add(tdNew);
        if (DEBUG) {
            log.info("ConjPP (different preps) adding: " + tdNew);
            log.info("  deleting: " + td1 + "  " + prepDep.first() + "  " + ccDep);
        }
        // remember these are "used up"
        td1.setReln(KILL);
        prepDep.first().setReln(KILL);
        ccDep.setReln(KILL);
        // so far we added the first prep grammatical relation
        int copyNumber = 1;
        for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
            TypedDependency conjDep = trip.first();
            TypedDependency prepOtherDep = trip.second();
            boolean pobj = trip.third();
            // OK, we have a conjunction over different PPs
            // we create a new node;
            // in order to make a distinction between the original node and its copy
            // we set the "copyCount" variable in the IndexedWord
            // existence of copyCount > 0 is checked at printing (toString method of
            // TypedDependency)
            IndexedWord label = td1.gov().makeSoftCopy(copyNumber);
            copyNumber++;
            // now we add the conjunction relation between conjHead (either td1.gov
            // or what it is itself conjoined with) and the copy
            // the copy has the same label as td1.gov() but is another TreeGraphNode
            // todo: Or that's the plan; there are a couple of knock on changes to fix before we can do this!
            // TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), conjHead, label);
            TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), td1.gov(), label);
            newTypedDeps.add(tdNew2);
            // now we still need to add the second prep grammatical relation
            // between the copy and the dependent of the prepOtherDep node
            TypedDependency tdNew3;
            GrammaticalRelation reln2 = determinePrepRelation(map, vmod, conjDep, td1, pobj);
            tdNew3 = new TypedDependency(reln2, label, prepOtherDep.dep());
            newTypedDeps.add(tdNew3);
            if (DEBUG) {
                log.info("  adding: " + tdNew2 + "  " + tdNew3);
                log.info("  deleting: " + conjDep + "  " + prepOtherDep);
            }
            conjDep.setReln(KILL);
            prepOtherDep.setReln(KILL);
            // promote dtrs that would be orphaned
            for (TypedDependency otd : otherDtrs) {
                // likely to be a "dep" and we want this to be a "prep"
                if (otd.dep().tag().equals("IN")) {
                    otd.setReln(PREPOSITIONAL_MODIFIER);
                }
                otd.setGov(td1.gov());
            }
        }
        // it does, since they're not automatically deleted
        for (TypedDependency td2 : possibles) {
            if (td2.reln() != KILL) {
                // td2.reln() != CONJUNCT) {
                if (DEBUG) {
                    log.info("Changing " + td2 + " to have governor of " + td1 + " [b]");
                }
                td2.setGov(td1.gov());
            }
        }
    // end for different prepositions
    }
    // below here is the single preposition/possessor basic case!!
    for (TypedDependency td1 : list) {
        if (td1.reln() == KILL) {
            continue;
        }
        IndexedWord td1Dep = td1.dep();
        String td1DepPOS = td1Dep.tag();
        // find all other typedDeps having our dep as gov
        Set<TypedDependency> possibles = map.get(td1Dep);
        if (possibles != null && (td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
            // look for the "second half"
            // default for prep relation is prep_
            boolean pobj = true;
            for (TypedDependency td2 : possibles) {
                if (td2.reln() != COORDINATION && td2.reln() != CONJUNCT) {
                    IndexedWord td2Dep = td2.dep();
                    String td2DepPOS = td2Dep.tag();
                    if ((td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
                        if (td2.reln() == POSSESSIVE_MODIFIER) {
                            if (!map.containsKey(td2Dep)) {
                                // if 's has no kids of its own (it shouldn't!)
                                td2.setReln(KILL);
                            }
                        }
                    } else if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO"))) && !isConjWithNoPrep(td2.gov(), possibles)) {
                        // OK, we have a pair td1, td2 to collapse to td3
                        if (DEBUG) {
                            log.info("(Single prep/poss base case collapsing " + td1 + " and " + td2);
                        }
                        // check whether we are in a pcomp case:
                        if (td2.reln() == PREPOSITIONAL_COMPLEMENT) {
                            pobj = false;
                        }
                        GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, pobj);
                        TypedDependency td3 = new TypedDependency(reln, td1.gov(), td2.dep());
                        if (DEBUG) {
                            log.info("PP adding: " + td3 + " deleting: " + td1 + ' ' + td2);
                        }
                        // add it to map to deal with recursive cases like "achieved this (PP (PP in part) with talent)"
                        map.get(td3.gov()).add(td3);
                        newTypedDeps.add(td3);
                        // remember these are "used up"
                        td1.setReln(KILL);
                        // remember these are "used up"
                        td2.setReln(KILL);
                    }
                }
            }
        // for TypedDependency td2
        }
        // it does, since they're not automatically deleted
        if (possibles != null && td1.reln() == KILL) {
            for (TypedDependency td2 : possibles) {
                if (td2.reln() != KILL) {
                    // td2.reln() != CONJUNCT) {
                    if (DEBUG) {
                        log.info("Changing " + td2 + " to have governor of " + td1 + " [c]");
                    }
                    td2.setGov(td1.gov());
                }
            }
        }
    }
    // now remove typed dependencies with reln "kill" and add new ones.
    for (Iterator<TypedDependency> iter = list.iterator(); iter.hasNext(); ) {
        TypedDependency td = iter.next();
        if (td.reln() == KILL) {
            if (DEBUG) {
                log.info("Removing dep killed in poss/prep (conj) collapse: " + td);
            }
            iter.remove();
        }
    }
    list.addAll(newTypedDeps);
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 18 with GrammaticalRelation

use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalRelations method getNmod.

/**
   * The "nmod" grammatical relation. Used to add case marker information
   *  to nominal modifier relations.<p>
   * They will be turned into nmod:word, where "word" is a preposition.
   *
   * @param prepositionString The preposition to make a GrammaticalRelation out of
   * @return A grammatical relation for this preposition
   */
public static GrammaticalRelation getNmod(String prepositionString) {
    /* Check for nmod subtypes which are not stored in the `nmods` map. */
    if (prepositionString.equals("npmod")) {
        return NP_ADVERBIAL_MODIFIER;
    } else if (prepositionString.equals("tmod")) {
        return TEMPORAL_MODIFIER;
    } else if (prepositionString.equals("poss")) {
        return POSSESSION_MODIFIER;
    }
    GrammaticalRelation result = nmods.get(prepositionString);
    if (result == null) {
        synchronized (nmods) {
            result = nmods.get(prepositionString);
            if (result == null) {
                result = new GrammaticalRelation(Language.UniversalEnglish, "nmod", "nmod_preposition", NOMINAL_MODIFIER, prepositionString);
                nmods.put(prepositionString, result);
                threadSafeAddRelation(result);
            }
        }
    }
    return result;
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation)

Example 19 with GrammaticalRelation

use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalRelations method getAdvcl.

/**
   * The "advcl" grammatical relation. Used to add case marker information
   *  to adverbial clause relations.<p>
   * They will be turned into advcl:word, where "word" is a preposition.
   *
   * @param advclString The preposition to make a GrammaticalRelation out of
   * @return A grammatical relation for this preposition
   */
public static GrammaticalRelation getAdvcl(String advclString) {
    GrammaticalRelation result = advcls.get(advclString);
    if (result == null) {
        synchronized (advcls) {
            result = advcls.get(advclString);
            if (result == null) {
                result = new GrammaticalRelation(Language.UniversalEnglish, "advcl", "advcl_preposition", ADV_CLAUSE_MODIFIER, advclString);
                advcls.put(advclString, result);
                threadSafeAddRelation(result);
            }
        }
    }
    return result;
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation)

Example 20 with GrammaticalRelation

use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method addCaseMarkersToReln.

/**
   * Appends case marker information to nmod/acl/advcl relations.
   * <p/>
   * E.g. if there is a relation {@code nmod(gov, dep)} and {@code case(dep, prep)}, then
   * the {@code nmod} relation is renamed to {@code nmod:prep}.
   *
   *
   * @param sg semantic graph
   * @param gov governor of the nmod/acl/advcl relation
   * @param mod modifier of the nmod/acl/advcl relation
   * @param caseMarkers {@code List<IndexedWord>} of all the case markers that depend on mod
   */
private static void addCaseMarkersToReln(SemanticGraph sg, IndexedWord gov, IndexedWord mod, List<IndexedWord> caseMarkers) {
    SemanticGraphEdge edge = sg.getEdge(gov, mod);
    int lastCaseMarkerIndex = 0;
    StringBuilder sb = new StringBuilder();
    boolean firstWord = true;
    for (IndexedWord cm : caseMarkers) {
        /* check for adjacency */
        if (lastCaseMarkerIndex == 0 || cm.index() == (lastCaseMarkerIndex + 1)) {
            if (!firstWord) {
                sb.append('_');
            }
            sb.append(cm.value());
            firstWord = false;
        } else {
            /* Should never happen as there should be never two non-adjacent case markers.
         * If it does happen nevertheless create an additional relation.
         */
            GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
            sg.addEdge(gov, mod, reln, Double.NEGATIVE_INFINITY, true);
            sb = new StringBuilder(cm.value());
            firstWord = true;
        }
        lastCaseMarkerIndex = cm.index();
    }
    GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
    edge.setRelation(reln);
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Aggregations

GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)49 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)13 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)13 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)11 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)9 ArrayList (java.util.ArrayList)5 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)4 IntTuple (edu.stanford.nlp.util.IntTuple)4 Tree (edu.stanford.nlp.trees.Tree)3 Word (edu.stanford.nlp.ling.Word)2 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)2 TypedDependency (edu.stanford.nlp.trees.TypedDependency)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)1 CorefChain (edu.stanford.nlp.coref.data.CorefChain)1 Dictionaries (edu.stanford.nlp.coref.data.Dictionaries)1 Mention (edu.stanford.nlp.coref.data.Mention)1 SpeakerInfo (edu.stanford.nlp.coref.data.SpeakerInfo)1