Search in sources :

Example 1 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class ChineseGrammaticalStructure method collapsePrepAndPoss.

private static void collapsePrepAndPoss(Collection<TypedDependency> list) {
    Collection<TypedDependency> newTypedDeps = new ArrayList<>();
    // Construct a map from words to the set of typed
    // dependencies in which the word appears as governor.
    Map<IndexedWord, Set<TypedDependency>> map = Generics.newHashMap();
    for (TypedDependency typedDep : list) {
        if (!map.containsKey(typedDep.gov())) {
            map.put(typedDep.gov(), Generics.<TypedDependency>newHashSet());
        }
        map.get(typedDep.gov()).add(typedDep);
    }
    for (TypedDependency td1 : list) {
        if (td1.reln() != GrammaticalRelation.KILL) {
            IndexedWord td1Dep = td1.dep();
            String td1DepPOS = td1Dep.tag();
            // find all other typedDeps having our dep as gov
            Set<TypedDependency> possibles = map.get(td1Dep);
            if (possibles != null) {
                // look for the "second half"
                for (TypedDependency td2 : possibles) {
                    // String td2DepPOS = td2Dep.parent().value();
                    if (td1.reln() == DEPENDENT && td2.reln() == DEPENDENT && td1DepPOS.equals("P")) {
                        GrammaticalRelation td3reln = ChineseGrammaticalRelations.valueOf(td1Dep.value());
                        if (td3reln == null) {
                            td3reln = GrammaticalRelation.valueOf(Language.Chinese, td1Dep.value());
                        }
                        TypedDependency td3 = new TypedDependency(td3reln, td1.gov(), td2.dep());
                        //log.info("adding: " + td3);
                        newTypedDeps.add(td3);
                        // remember these are "used up"
                        td1.setReln(GrammaticalRelation.KILL);
                        // remember these are "used up"
                        td2.setReln(GrammaticalRelation.KILL);
                    }
                }
                // longer appears.  So, change its governor to 'drew'.
                if (td1.reln().equals(GrammaticalRelation.KILL)) {
                    for (TypedDependency td2 : possibles) {
                        if (!td2.reln().equals(GrammaticalRelation.KILL)) {
                            //log.info("td1 & td2: " + td1 + " & " + td2);
                            td2.setGov(td1.gov());
                        }
                    }
                }
            }
        }
    }
    // now copy remaining unkilled TDs from here to new
    for (TypedDependency td : list) {
        if (!td.reln().equals(GrammaticalRelation.KILL)) {
            newTypedDeps.add(td);
        }
    }
    // forget all (esp. killed) TDs
    list.clear();
    list.addAll(newTypedDeps);
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 2 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method collapseReferent.

/**
   * This method will collapse a referent relation such as follows. e.g.:
   * "The man that I love ... " ref(man, that) dobj(love, that) -> ref(man, that) dobj(love,
   * man)
   */
private static void collapseReferent(SemanticGraph sg) {
    // find typed deps of form ref(gov, dep)
    // put them in a List for processing
    List<SemanticGraphEdge> refs = new ArrayList<>(sg.findAllRelns(REFERENT));
    SemanticGraph sgCopy = sg.makeSoftCopy();
    // now substitute target of referent where possible
    for (SemanticGraphEdge ref : refs) {
        // take the relative word
        IndexedWord dep = ref.getDependent();
        // take the antecedent
        IndexedWord ant = ref.getGovernor();
        for (Iterator<SemanticGraphEdge> iter = sgCopy.incomingEdgeIterator(dep); iter.hasNext(); ) {
            SemanticGraphEdge edge = iter.next();
            // disconnected) [cdm Jan 2010]
            if (edge.getRelation() != REFERENT && !edge.getGovernor().equals(ant)) {
                sg.removeEdge(edge);
                sg.addEdge(edge.getGovernor(), ant, edge.getRelation(), Double.NEGATIVE_INFINITY, true);
            }
        }
    }
}
Also used : SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 3 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method process3WP.

/**
   * Processes all the three-word prepositions in THREE_WORD_PREPS.
   */
private static void process3WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> trigrams) {
    for (String trigram : THREE_WORD_PREPS) {
        if (trigrams.get(trigram) == null) {
            continue;
        }
        for (Integer i : trigrams.get(trigram)) {
            IndexedWord w1 = sg.getNodeByIndexSafe(i);
            IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
            IndexedWord w3 = sg.getNodeByIndexSafe(i + 2);
            if (w1 == null || w2 == null || w3 == null) {
                continue;
            }
            SemgrexMatcher matcher = THREE_WORD_PREPS_PATTERN.matcher(sg);
            IndexedWord gov = null;
            IndexedWord gov2 = null;
            while (matcher.find()) {
                if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2")) && w3.equals(matcher.getNode("w3"))) {
                    gov = matcher.getNode("gov");
                    gov2 = matcher.getNode("gov2");
                    break;
                }
            }
            if (gov2 == null) {
                continue;
            }
            GrammaticalRelation markerReln = CASE_MARKER;
            if (sg.getRoots().contains(w2)) {
                SemanticGraphEdge edge = sg.getEdge(w2, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                sg.getRoots().remove(w2);
                sg.addRoot(gov2);
            } else {
                SemanticGraphEdge edge = sg.getEdge(w2, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                gov = gov == null ? sg.getParent(w2) : gov;
                if (gov == null) {
                    continue;
                }
                GrammaticalRelation reln = sg.getEdge(gov, w2).getRelation();
                if (reln == NOMINAL_MODIFIER && (edge.getRelation() == CLAUSAL_MODIFIER || edge.getRelation() == ADV_CLAUSE_MODIFIER)) {
                    reln = edge.getRelation();
                    markerReln = MARKER;
                }
                sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
            }
            /* Make children of w2 dependents of gov2. */
            for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w2)) {
                sg.removeEdge(edge2);
                sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
            }
            createMultiWordExpression(sg, gov2, markerReln, w1, w2, w3);
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 4 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method addConjInformation.

/**
   * Adds the type of conjunction to all conjunct relations.
   * <p/>
   * {@code cc(Marie, and)}, {@code conj(Marie, Chris)} and {@code conj(Marie, John)}
   * become {@code cc(Marie, and)}, {@code conj:and(Marie, Chris)} and {@code conj:and(Marie, John)}.
   * <p/>
   * In case multiple coordination marker depend on the same governor
   * the one that precedes the conjunct is appended to the conjunction relation or the
   * first one if no preceding marker exists.
   * <p/>
   * Some multi-word coordination markers are collapsed to {@code conj:and} or {@code conj:negcc}.
   * See {@link #conjValue(IndexedWord, SemanticGraph)}.
   *
   * @param sg A SemanticGraph from a sentence
   */
private static void addConjInformation(SemanticGraph sg) {
    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
        return;
    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = CONJUNCTION_PATTERN.matcher(sgCopy);
    IndexedWord oldGov = null;
    IndexedWord oldCcDep = null;
    List<IndexedWord> conjDeps = Generics.newLinkedList();
    while (matcher.find()) {
        IndexedWord conjDep = matcher.getNode("conj");
        IndexedWord gov = matcher.getNode("gov");
        IndexedWord ccDep = matcher.getNode("cc");
        if (oldGov != null && (!gov.equals(oldGov) || !ccDep.equals(oldCcDep))) {
            addConjToReln(sg, oldGov, conjDeps, oldCcDep);
            conjDeps = Generics.newLinkedList();
        }
        oldCcDep = ccDep;
        conjDeps.add(conjDep);
        oldGov = gov;
    }
    if (oldGov != null) {
        addConjToReln(sg, oldGov, conjDeps, oldCcDep);
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 5 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method expandPrepConjunction.

/*
   * Used by expandPrepConjunctions.
   */
private static void expandPrepConjunction(SemanticGraph sg, IndexedWord gov, List<IndexedWord> conjDeps, IndexedWord ccDep) {
    IndexedWord caseGov = sg.getParent(gov);
    if (caseGov == null)
        return;
    IndexedWord caseGovGov = sg.getParent(caseGov);
    if (caseGovGov == null)
        return;
    IndexedWord conjGov = caseGovGov.getOriginal() != null ? caseGovGov.getOriginal() : caseGovGov;
    GrammaticalRelation rel = sg.reln(caseGovGov, caseGov);
    List<IndexedWord> newConjDeps = Generics.newLinkedList();
    for (IndexedWord conjDep : conjDeps) {
        //IndexedWord caseGovCopy = caseGov.makeSoftCopy();
        IndexedWord caseGovGovCopy = caseGovGov.makeSoftCopy();
        /* Change conj(prep-1, prep-2) to case(prep-1-gov-copy, prep-2) */
        //SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
        //sg.removeEdge(edge);
        //sg.addEdge(caseGovCopy, conjDep, CASE_MARKER, Double.NEGATIVE_INFINITY, false);
        /* Add relation to copy node. */
        //sg.addEdge(caseGovGovCopy, caseGovCopy, rel, Double.NEGATIVE_INFINITY, false);
        sg.addEdge(conjGov, caseGovGovCopy, CONJUNCT, Double.NEGATIVE_INFINITY, false);
        newConjDeps.add(caseGovGovCopy);
        sg.addEdge(caseGovGovCopy, caseGov, rel, Double.NEGATIVE_INFINITY, true);
        List<IndexedWord> caseMarkers = Generics.newArrayList();
        caseMarkers.add(conjDep);
        addCaseMarkersToReln(sg, caseGovGovCopy, caseGov, caseMarkers);
    /* Attach all children except case markers of caseGov to caseGovCopy. */
    //for (SemanticGraphEdge e : sg.outgoingEdgeList(caseGov)) {
    //  if (e.getRelation() != CASE_MARKER && ! e.getDependent().equals(ccDep)) {
    //    sg.addEdge(caseGovCopy, e.getDependent(), e.getRelation(), Double.NEGATIVE_INFINITY, false);
    //  }
    // }
    }
    /* Attach CC node to caseGov */
    //SemanticGraphEdge edge = sg.getEdge(gov, ccDep);
    //sg.removeEdge(edge);
    //sg.addEdge(conjGov, ccDep, COORDINATION, Double.NEGATIVE_INFINITY, false);
    /* Add conjunction information for these relations already at this point.
     * It could be that we add several coordinating conjunctions while collapsing
     * and we might not know which conjunction belongs to which conjunct at a later
     * point.
     */
    addConjToReln(sg, conjGov, newConjDeps, ccDep);
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5