Search in sources :

Example 26 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class ClauseSplitterSearchProblem method addSubtree.

/**
   * A helper to add an entire subtree to a given dependency tree.
   *
   * @param toModify The tree to add the subtree to.
   * @param root The root of the tree where we should be adding the subtree.
   * @param rel The relation to add the subtree with.
   * @param originalTree The orignal tree (i.e., {@link ClauseSplitterSearchProblem#tree}).
   * @param subject The root of the clause to add.
   * @param ignoredEdges The edges to ignore adding when adding this subtree.
   */
private static void addSubtree(SemanticGraph toModify, IndexedWord root, String rel, SemanticGraph originalTree, IndexedWord subject, Collection<SemanticGraphEdge> ignoredEdges) {
    if (toModify.containsVertex(subject)) {
        // This subtree already exists.
        return;
    }
    Queue<IndexedWord> fringe = new LinkedList<>();
    Collection<IndexedWord> wordsToAdd = new ArrayList<>();
    Collection<SemanticGraphEdge> edgesToAdd = new ArrayList<>();
    // Search for subtree to add
    for (SemanticGraphEdge edge : originalTree.outgoingEdgeIterable(subject)) {
        if (!ignoredEdges.contains(edge)) {
            if (toModify.containsVertex(edge.getDependent())) {
                // Case: we're adding a subtree that's not disjoint from toModify. This is bad news.
                return;
            }
            edgesToAdd.add(edge);
            fringe.add(edge.getDependent());
        }
    }
    while (!fringe.isEmpty()) {
        IndexedWord node = fringe.poll();
        wordsToAdd.add(node);
        for (SemanticGraphEdge edge : originalTree.outgoingEdgeIterable(node)) {
            if (!ignoredEdges.contains(edge)) {
                if (toModify.containsVertex(edge.getDependent())) {
                    // Case: we're adding a subtree that's not disjoint from toModify. This is bad news.
                    return;
                }
                edgesToAdd.add(edge);
                fringe.add(edge.getDependent());
            }
        }
    }
    // Add subtree
    // (add subject)
    toModify.addVertex(subject);
    toModify.addEdge(root, subject, GrammaticalRelation.valueOf(Language.English, rel), Double.NEGATIVE_INFINITY, false);
    // (add nodes)
    wordsToAdd.forEach(toModify::addVertex);
    // (add edges)
    for (SemanticGraphEdge edge : edgesToAdd) {
        assert !toModify.incomingEdgeIterator(edge.getDependent()).hasNext();
        toModify.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
    }
}
Also used : SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 27 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class NaturalLogicAnnotator method annotateOperators.

/**
   * Find the operators in this sentence, annotating the head word (only!) of each operator with the
   * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.OperatorAnnotation}.
   *
   * @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)}
   */
private void annotateOperators(CoreMap sentence) {
    SemanticGraph tree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
    List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    if (tree == null) {
        tree = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
    }
    for (SemgrexPattern pattern : PATTERNS) {
        SemgrexMatcher matcher = pattern.matcher(tree);
        while (matcher.find()) {
            // Get terms
            IndexedWord properSubject = matcher.getNode("Subject");
            IndexedWord quantifier, subject;
            boolean namedEntityQuantifier = false;
            if (properSubject != null) {
                quantifier = subject = properSubject;
                namedEntityQuantifier = true;
            } else {
                quantifier = matcher.getNode("quantifier");
                subject = matcher.getNode("subject");
            }
            // Validate quantifier
            // At the end of this
            Optional<Triple<Operator, Integer, Integer>> quantifierInfo;
            if (namedEntityQuantifier) {
                // named entities have the "all" semantics by default.
                if (!neQuantifiers) {
                    continue;
                }
                // note: empty quantifier span given
                quantifierInfo = Optional.of(Triple.makeTriple(Operator.IMPLICIT_NAMED_ENTITY, quantifier.index(), quantifier.index()));
            } else {
                // find the quantifier, and return some info about it.
                quantifierInfo = validateQuantifierByHead(sentence, quantifier);
            }
            // (fix up 'there are')
            if ("be".equals(subject == null ? null : subject.lemma())) {
                boolean hasExpl = false;
                IndexedWord newSubject = null;
                for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(subject)) {
                    if ("nsubj".equals(outgoingEdge.getRelation().toString())) {
                        newSubject = outgoingEdge.getDependent();
                    } else if ("expl".equals(outgoingEdge.getRelation().toString())) {
                        hasExpl = true;
                    }
                }
                if (hasExpl) {
                    subject = newSubject;
                }
            }
            // (fix up '$n$ of')
            if ("CD".equals(subject == null ? null : subject.tag())) {
                for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(subject)) {
                    String rel = outgoingEdge.getRelation().toString();
                    if (rel.startsWith("nmod")) {
                        subject = outgoingEdge.getDependent();
                    }
                }
            }
            // Set tokens
            if (quantifierInfo.isPresent()) {
                // Compute span
                OperatorSpec scope = computeScope(tree, quantifierInfo.get().first, matcher.getNode("pivot"), Pair.makePair(quantifierInfo.get().second, quantifierInfo.get().third), subject, namedEntityQuantifier, matcher.getNode("object"), tokens.size());
                // Set annotation
                CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(quantifier.index() - 1);
                OperatorSpec oldScope = token.get(OperatorAnnotation.class);
                if (oldScope == null || oldScope.quantifierLength() < scope.quantifierLength() || oldScope.instance != scope.instance) {
                    token.set(OperatorAnnotation.class, scope);
                } else {
                    token.set(OperatorAnnotation.class, OperatorSpec.merge(oldScope, scope));
                }
            }
        }
    }
    // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap.
    // In these cases, take the longer quantifier match.
    List<OperatorSpec> quantifiers = new ArrayList<>();
    sentence.get(CoreAnnotations.TokensAnnotation.class).stream().filter(token -> token.containsKey(OperatorAnnotation.class)).forEach(token -> quantifiers.add(token.get(OperatorAnnotation.class)));
    quantifiers.sort((x, y) -> y.quantifierLength() - x.quantifierLength());
    for (OperatorSpec quantifier : quantifiers) {
        for (int i = quantifier.quantifierBegin; i < quantifier.quantifierEnd; ++i) {
            if (i != quantifier.quantifierHead) {
                tokens.get(i).remove(OperatorAnnotation.class);
            }
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) CoreLabel(edu.stanford.nlp.ling.CoreLabel) java.util(java.util) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) Redwood(edu.stanford.nlp.util.logging.Redwood) edu.stanford.nlp.util(edu.stanford.nlp.util) SentenceAnnotator(edu.stanford.nlp.pipeline.SentenceAnnotator) SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) Span(edu.stanford.nlp.ie.machinereading.structure.Span) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TokenSequenceMatcher(edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) IndexedWord(edu.stanford.nlp.ling.IndexedWord) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 28 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class NaturalLogicWeights method objDeletionProbability.

public double objDeletionProbability(SemanticGraphEdge edge, Iterable<SemanticGraphEdge> neighbors) {
    // Get information about the neighbors
    // (in a totally not-creepy-stalker sort of way)
    Optional<String> subj = Optional.empty();
    Optional<String> pp = Optional.empty();
    for (SemanticGraphEdge neighbor : neighbors) {
        if (neighbor != edge) {
            String neighborRel = neighbor.getRelation().toString();
            if (neighborRel.contains("subj")) {
                subj = Optional.of(neighbor.getDependent().originalText().toLowerCase());
            }
            if (neighborRel.contains("prep")) {
                pp = Optional.of(neighborRel);
            }
            if (neighborRel.contains("obj")) {
                // allow deleting second object
                return 1.0;
            }
        }
    }
    String obj = edge.getDependent().originalText().toLowerCase();
    String verb = edge.getGovernor().originalText().toLowerCase();
    // Compute the most informative drop probability we can
    Double rawScore = null;
    if (subj.isPresent()) {
        if (pp.isPresent()) {
            // Case: subj+obj
            rawScore = verbSubjPPObjAffinity.get(Quadruple.makeQuadruple(verb, subj.get(), pp.get(), obj));
        }
    }
    if (rawScore == null) {
        rawScore = verbObjAffinity.get(verb);
    }
    if (rawScore == null) {
        return deletionProbability(edge.getRelation().toString());
    } else {
        return 1.0 - Math.min(1.0, rawScore / upperProbabilityCap);
    }
}
Also used : SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 29 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class RelationTripleSegmenter method segment.

/**
   * <p>
   * Try to segment this sentence as a relation triple.
   * This sentence must already match one of a few strict patterns for a valid OpenIE extraction.
   * If it does not, then no relation triple is created.
   * That is, this is <b>not</b> a relation extractor; it is just a utility to segment what is already a
   * (subject, relation, object) triple into these three parts.
   * </p>
   *
   * <p>
   *   This method will attempt to use both the verb-centric patterns and the ACL-centric patterns.
   * </p>
   *
   * @param parse The sentence to process, as a dependency tree.
   * @param confidence An optional confidence to pass on to the relation triple.
   * @param consumeAll if true, force the entire parse to be consumed by the pattern.
   * @return A relation triple, if this sentence matches one of the patterns of a valid relation triple.
   */
public Optional<RelationTriple> segment(SemanticGraph parse, Optional<Double> confidence, boolean consumeAll) {
    // Copy and clean the tree
    parse = new SemanticGraph(parse);
    // Special case "there is <something>". Arguably this is a job for the clause splitter, but the <something> is
    // sometimes not _really_ its own clause
    IndexedWord root = parse.getFirstRoot();
    if ((root.lemma() != null && root.lemma().equalsIgnoreCase("be")) || (root.lemma() == null && ("is".equalsIgnoreCase(root.word()) || "are".equalsIgnoreCase(root.word()) || "were".equalsIgnoreCase(root.word()) || "be".equalsIgnoreCase(root.word())))) {
        // Check for the "there is" construction
        boolean foundThere = false;
        // an indicator for there being too much nonsense hanging off of the root
        boolean tooMayArcs = false;
        Optional<SemanticGraphEdge> newRoot = Optional.empty();
        for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(root)) {
            if (edge.getRelation().toString().equals("expl") && edge.getDependent().word().equalsIgnoreCase("there")) {
                foundThere = true;
            } else if (edge.getRelation().toString().equals("nsubj")) {
                newRoot = Optional.of(edge);
            } else {
                tooMayArcs = true;
            }
        }
        // Split off "there is")
        if (foundThere && newRoot.isPresent() && !tooMayArcs) {
            ClauseSplitterSearchProblem.splitToChildOfEdge(parse, newRoot.get());
        }
    }
    // Run the patterns
    Optional<RelationTriple> extraction = segmentVerb(parse, confidence, consumeAll);
    if (!extraction.isPresent()) {
        extraction = segmentACL(parse, confidence, consumeAll);
    }
    //
    if (extraction.isPresent()) {
        boolean shouldRemove = true;
        for (CoreLabel token : extraction.get()) {
            if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
                shouldRemove = false;
            }
        }
        if (shouldRemove) {
            return Optional.empty();
        }
    }
    // Return
    return extraction;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 30 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class Util method cleanTree.

/**
   * Fix some bizarre peculiarities with certain trees.
   * So far, these include:
   * <ul>
   * <li>Sometimes there's a node from a word to itself. This seems wrong.</li>
   * </ul>
   *
   * @param tree The tree to clean (in place!).
   * @return A list of extra edges, which are valid but were removed.
   */
public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
    //    assert !isCyclic(tree);
    // Clean nodes
    List<IndexedWord> toDelete = new ArrayList<>();
    for (IndexedWord vertex : tree.vertexSet()) {
        // Clean punctuation
        if (vertex.tag() == null) {
            continue;
        }
        char tag = vertex.backingLabel().tag().charAt(0);
        if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') {
            if (!tree.outgoingEdgeIterator(vertex).hasNext()) {
                // This should really never happen, but it does.
                toDelete.add(vertex);
            }
        }
    }
    toDelete.forEach(tree::removeVertex);
    // Clean edges
    Iterator<SemanticGraphEdge> iter = tree.edgeIterable().iterator();
    List<Triple<IndexedWord, IndexedWord, SemanticGraphEdge>> toAdd = new ArrayList<>();
    toDelete.clear();
    while (iter.hasNext()) {
        SemanticGraphEdge edge = iter.next();
        if (edge.getDependent().index() == edge.getGovernor().index()) {
            // Clean up copy-edges
            if (edge.getDependent().isCopy(edge.getGovernor())) {
                for (SemanticGraphEdge toCopy : tree.outgoingEdgeIterable(edge.getDependent())) {
                    toAdd.add(Triple.makeTriple(edge.getGovernor(), toCopy.getDependent(), toCopy));
                }
                toDelete.add(edge.getDependent());
            }
            if (edge.getGovernor().isCopy(edge.getDependent())) {
                for (SemanticGraphEdge toCopy : tree.outgoingEdgeIterable(edge.getGovernor())) {
                    toAdd.add(Triple.makeTriple(edge.getDependent(), toCopy.getDependent(), toCopy));
                }
                toDelete.add(edge.getGovernor());
            }
            // Clean self-edges
            iter.remove();
        } else if (edge.getRelation().toString().equals("punct")) {
            // Clean punctuation (again)
            if (!tree.outgoingEdgeIterator(edge.getDependent()).hasNext()) {
                // This should really never happen, but it does.
                iter.remove();
            }
        }
    }
    // (add edges we wanted to add)
    toDelete.forEach(tree::removeVertex);
    for (Triple<IndexedWord, IndexedWord, SemanticGraphEdge> edge : toAdd) {
        tree.addEdge(edge.first, edge.second, edge.third.getRelation(), edge.third.getWeight(), edge.third.isExtra());
    }
    // Handle extra edges.
    // Two cases:
    // (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
    //     in this case, keep the extra
    // (2) otherwise, delete the extra
    List<SemanticGraphEdge> extraEdges = new ArrayList<>();
    for (SemanticGraphEdge edge : tree.edgeIterable()) {
        if (edge.isExtra()) {
            List<SemanticGraphEdge> incomingEdges = tree.incomingEdgeList(edge.getDependent());
            SemanticGraphEdge toKeep = null;
            for (SemanticGraphEdge candidate : incomingEdges) {
                if (toKeep == null) {
                    toKeep = candidate;
                } else if (toKeep.getRelation().toString().startsWith("conj") && candidate.getRelation().toString().matches(".subj.*|.obj.*")) {
                    toKeep = candidate;
                } else if (!candidate.isExtra() && !(candidate.getRelation().toString().startsWith("conj") && toKeep.getRelation().toString().matches(".subj.*|.obj.*"))) {
                    toKeep = candidate;
                }
            }
            for (SemanticGraphEdge candidate : incomingEdges) {
                if (candidate != toKeep) {
                    extraEdges.add(candidate);
                }
            }
        }
    }
    extraEdges.forEach(tree::removeEdge);
    // Add apposition edges (simple coref)
    for (SemanticGraphEdge extraEdge : new ArrayList<>(extraEdges)) {
        // note[gabor] prevent concurrent modification exception
        for (SemanticGraphEdge candidateAppos : tree.incomingEdgeIterable(extraEdge.getDependent())) {
            if (candidateAppos.getRelation().toString().equals("appos")) {
                extraEdges.add(new SemanticGraphEdge(extraEdge.getGovernor(), candidateAppos.getGovernor(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra()));
            }
        }
        for (SemanticGraphEdge candidateAppos : tree.outgoingEdgeIterable(extraEdge.getDependent())) {
            if (candidateAppos.getRelation().toString().equals("appos")) {
                extraEdges.add(new SemanticGraphEdge(extraEdge.getGovernor(), candidateAppos.getDependent(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra()));
            }
        }
    }
    // Brute force ensure tree
    // Remove incoming edges from roots
    List<SemanticGraphEdge> rootIncomingEdges = new ArrayList<>();
    for (IndexedWord root : tree.getRoots()) {
        for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(root)) {
            rootIncomingEdges.add(incomingEdge);
        }
    }
    rootIncomingEdges.forEach(tree::removeEdge);
    // Loop until it becomes a tree.
    boolean changed = true;
    while (changed) {
        // I just want trees to be trees; is that so much to ask!?
        changed = false;
        List<IndexedWord> danglingNodes = new ArrayList<>();
        List<SemanticGraphEdge> invalidEdges = new ArrayList<>();
        for (IndexedWord vertex : tree.vertexSet()) {
            // Collect statistics
            Iterator<SemanticGraphEdge> incomingIter = tree.incomingEdgeIterator(vertex);
            boolean hasIncoming = incomingIter.hasNext();
            boolean hasMultipleIncoming = false;
            if (hasIncoming) {
                incomingIter.next();
                hasMultipleIncoming = incomingIter.hasNext();
            }
            // Register actions
            if (!hasIncoming && !tree.getRoots().contains(vertex)) {
                danglingNodes.add(vertex);
            } else {
                if (hasMultipleIncoming) {
                    for (SemanticGraphEdge edge : new IterableIterator<>(incomingIter)) {
                        invalidEdges.add(edge);
                    }
                }
            }
        }
        // Perform actions
        for (IndexedWord vertex : danglingNodes) {
            tree.removeVertex(vertex);
            changed = true;
        }
        for (SemanticGraphEdge edge : invalidEdges) {
            tree.removeEdge(edge);
            changed = true;
        }
    }
    //            This is a common parse error.
    for (IndexedWord vertex : tree.vertexSet()) {
        SemanticGraphEdge thatEdge = null;
        int dobjCount = 0;
        for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(vertex)) {
            if ("that".equalsIgnoreCase(edge.getDependent().word())) {
                thatEdge = edge;
            }
            if ("dobj".equals(edge.getRelation().toString())) {
                dobjCount += 1;
            }
        }
        if (dobjCount > 1 && thatEdge != null) {
            // Case: there are two dobj edges, one of which goes to the word "that"
            // Action: rewrite the dobj edge to "that" to be a "mark" edge.
            tree.removeEdge(thatEdge);
            tree.addEdge(thatEdge.getGovernor(), thatEdge.getDependent(), GrammaticalRelation.valueOf(thatEdge.getRelation().getLanguage(), "mark"), thatEdge.getWeight(), thatEdge.isExtra());
        }
    }
    // Return
    assert isTree(tree);
    return extraEdges;
}
Also used : SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)65 IndexedWord (edu.stanford.nlp.ling.IndexedWord)52 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 CoreLabel (edu.stanford.nlp.ling.CoreLabel)15 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)15 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)11 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)10 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)8 Pair (edu.stanford.nlp.util.Pair)6 Mention (edu.stanford.nlp.coref.data.Mention)5 Span (edu.stanford.nlp.ie.machinereading.structure.Span)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 Tree (edu.stanford.nlp.trees.Tree)5 CoreMap (edu.stanford.nlp.util.CoreMap)5 HashMap (java.util.HashMap)5 Collectors (java.util.stream.Collectors)5 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)4 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)4 IntPair (edu.stanford.nlp.util.IntPair)4 java.util (java.util)4