Search in sources :

Example 1 with SemgrexMatcher

use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method process3WP.

/**
   * Processes all the three-word prepositions in THREE_WORD_PREPS.
   */
private static void process3WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> trigrams) {
    for (String trigram : THREE_WORD_PREPS) {
        if (trigrams.get(trigram) == null) {
            continue;
        }
        for (Integer i : trigrams.get(trigram)) {
            IndexedWord w1 = sg.getNodeByIndexSafe(i);
            IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
            IndexedWord w3 = sg.getNodeByIndexSafe(i + 2);
            if (w1 == null || w2 == null || w3 == null) {
                continue;
            }
            SemgrexMatcher matcher = THREE_WORD_PREPS_PATTERN.matcher(sg);
            IndexedWord gov = null;
            IndexedWord gov2 = null;
            while (matcher.find()) {
                if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2")) && w3.equals(matcher.getNode("w3"))) {
                    gov = matcher.getNode("gov");
                    gov2 = matcher.getNode("gov2");
                    break;
                }
            }
            if (gov2 == null) {
                continue;
            }
            GrammaticalRelation markerReln = CASE_MARKER;
            if (sg.getRoots().contains(w2)) {
                SemanticGraphEdge edge = sg.getEdge(w2, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                sg.getRoots().remove(w2);
                sg.addRoot(gov2);
            } else {
                SemanticGraphEdge edge = sg.getEdge(w2, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                gov = gov == null ? sg.getParent(w2) : gov;
                if (gov == null) {
                    continue;
                }
                GrammaticalRelation reln = sg.getEdge(gov, w2).getRelation();
                if (reln == NOMINAL_MODIFIER && (edge.getRelation() == CLAUSAL_MODIFIER || edge.getRelation() == ADV_CLAUSE_MODIFIER)) {
                    reln = edge.getRelation();
                    markerReln = MARKER;
                }
                sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
            }
            /* Make children of w2 dependents of gov2. */
            for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w2)) {
                sg.removeEdge(edge2);
                sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
            }
            createMultiWordExpression(sg, gov2, markerReln, w1, w2, w3);
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 2 with SemgrexMatcher

use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method addConjInformation.

/**
   * Adds the type of conjunction to all conjunct relations.
   * <p/>
   * {@code cc(Marie, and)}, {@code conj(Marie, Chris)} and {@code conj(Marie, John)}
   * become {@code cc(Marie, and)}, {@code conj:and(Marie, Chris)} and {@code conj:and(Marie, John)}.
   * <p/>
   * In case multiple coordination marker depend on the same governor
   * the one that precedes the conjunct is appended to the conjunction relation or the
   * first one if no preceding marker exists.
   * <p/>
   * Some multi-word coordination markers are collapsed to {@code conj:and} or {@code conj:negcc}.
   * See {@link #conjValue(IndexedWord, SemanticGraph)}.
   *
   * @param sg A SemanticGraph from a sentence
   */
private static void addConjInformation(SemanticGraph sg) {
    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
        return;
    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = CONJUNCTION_PATTERN.matcher(sgCopy);
    IndexedWord oldGov = null;
    IndexedWord oldCcDep = null;
    List<IndexedWord> conjDeps = Generics.newLinkedList();
    while (matcher.find()) {
        IndexedWord conjDep = matcher.getNode("conj");
        IndexedWord gov = matcher.getNode("gov");
        IndexedWord ccDep = matcher.getNode("cc");
        if (oldGov != null && (!gov.equals(oldGov) || !ccDep.equals(oldCcDep))) {
            addConjToReln(sg, oldGov, conjDeps, oldCcDep);
            conjDeps = Generics.newLinkedList();
        }
        oldCcDep = ccDep;
        conjDeps.add(conjDep);
        oldGov = gov;
    }
    if (oldGov != null) {
        addConjToReln(sg, oldGov, conjDeps, oldCcDep);
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 3 with SemgrexMatcher

use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method expandPPConjunctions.

/**
   * Expands PPs with conjunctions such as in the sentence
   * "Bill flies to France and from Serbia." by copying the verb
   * that governs the prepositinal phrase resulting in the following
   * relations:
   * <p/>
   * {@code conj:and(flies, flies')}<br/>
   * {@code case(France, to)}<br/>
   * {@code cc(flies, and)}<br/>
   * {@code case(Serbia, from)}<br/>
   * {@code nmod(flies, France)}<br/>
   * {@code nmod(flies', Serbia)}<br/>
   * <p/>
   * The label of the conjunct relation includes the conjunction type
   * because if the verb has multiple cc relations then it can be impossible
   * to infer which coordination marker belongs to which conjuncts.
   *
   * @param sg SemanticGraph to operate on.
   */
private static void expandPPConjunctions(SemanticGraph sg) {
    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
        return;
    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = PP_CONJP_PATTERN.matcher(sgCopy);
    IndexedWord oldGov = null;
    IndexedWord oldCcDep = null;
    List<IndexedWord> conjDeps = Generics.newLinkedList();
    while (matcher.find()) {
        IndexedWord conjDep = matcher.getNode("conj");
        IndexedWord gov = matcher.getNode("gov");
        IndexedWord ccDep = matcher.getNode("cc");
        if (oldGov != null && (!gov.equals(oldGov) || !ccDep.equals(oldCcDep))) {
            expandPPConjunction(sg, oldGov, conjDeps, oldCcDep);
            conjDeps = Generics.newLinkedList();
        }
        oldCcDep = ccDep;
        oldGov = gov;
        conjDeps.add(conjDep);
    }
    if (oldGov != null) {
        expandPPConjunction(sg, oldGov, conjDeps, oldCcDep);
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 4 with SemgrexMatcher

use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method correctWHAttachment.

/**
   * Tries to correct complicated cases of WH-movement in
   * sentences such as "What does Mary seem to have?" in
   * which "What" should attach to "have" instead of the
   * control verb.
   *
   * @param sg The Semantic graph to operate on.
   */
private static void correctWHAttachment(SemanticGraph sg) {
    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
        return;
    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
    while (matcher.findNextMatchingNode()) {
        IndexedWord root = matcher.getNode("root");
        IndexedWord embeddedVerb = matcher.getNode("embedded");
        IndexedWord wh = matcher.getNode("wh");
        IndexedWord dobj = matcher.getNode("obj");
        /* Check if the object is a WH-word. */
        if (wh.tag().startsWith("W")) {
            boolean reattach = false;
            /* If the control verb already has an object, then
           we have to reattach the WH-word to the verb in the embedded clause. */
            if (dobj != null) {
                reattach = true;
            } else {
                /* If the control verb can't have an object, we also have to reattach. */
                String lemma = Morphology.lemmaStatic(root.value(), root.tag());
                if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
                    reattach = true;
                }
            }
            if (reattach) {
                SemanticGraphEdge edge = sg.getEdge(root, wh);
                if (edge != null) {
                    sg.removeEdge(edge);
                    sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
                }
            }
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 5 with SemgrexMatcher

use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method processComplex2WP.

/**
   * Processes all the two-word prepositions in TWO_WORD_PREPS_COMPLEX.
   */
private static void processComplex2WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> bigrams) {
    for (String bigram : TWO_WORD_PREPS_COMPLEX) {
        if (bigrams.get(bigram) == null) {
            continue;
        }
        for (Integer i : bigrams.get(bigram)) {
            IndexedWord w1 = sg.getNodeByIndexSafe(i);
            IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
            if (w1 == null || w2 == null) {
                continue;
            }
            SemgrexMatcher matcher = TWO_WORD_PREPS_COMPLEX_PATTERN.matcher(sg);
            IndexedWord gov = null;
            IndexedWord gov2 = null;
            while (matcher.find()) {
                if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2"))) {
                    gov = matcher.getNode("gov");
                    gov2 = matcher.getNode("gov2");
                    break;
                }
            }
            if (gov2 == null) {
                continue;
            }
            /* Attach the head of the prepositional phrase to
         * the head of w1. */
            if (sg.getRoots().contains(w1)) {
                SemanticGraphEdge edge = sg.getEdge(w1, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                sg.getRoots().remove(w1);
                sg.addRoot(gov2);
            } else {
                SemanticGraphEdge edge = sg.getEdge(w1, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                gov = gov == null ? sg.getParent(w1) : gov;
                if (gov == null) {
                    continue;
                }
                /* Determine the relation to use. If it is a relation that can
           * join two clauses and w1 is the head of a copular construction, then
           * use the relation of w1 and its parent. Otherwise use the relation of edge. */
                GrammaticalRelation reln = edge.getRelation();
                if (sg.hasChildWithReln(w1, COPULA)) {
                    GrammaticalRelation reln2 = sg.getEdge(gov, w1).getRelation();
                    if (clauseRelations.contains(reln2)) {
                        reln = reln2;
                    }
                }
                sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
            }
            /* Make children of w1 dependents of gov2. */
            for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w1)) {
                sg.removeEdge(edge2);
                sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
            }
            createMultiWordExpression(sg, gov2, CASE_MARKER, w1, w2);
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Aggregations

SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)23 IndexedWord (edu.stanford.nlp.ling.IndexedWord)19 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)13 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)9 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)4 CoreLabel (edu.stanford.nlp.ling.CoreLabel)4 Span (edu.stanford.nlp.ie.machinereading.structure.Span)3 TokenSequenceMatcher (edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)3 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)2 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)1 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)1 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)1 NaturalLogicAnnotations (edu.stanford.nlp.naturalli.NaturalLogicAnnotations)1 EnglishTreebankParserParams (edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams)1 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)1 Annotation (edu.stanford.nlp.pipeline.Annotation)1 SentenceAnnotator (edu.stanford.nlp.pipeline.SentenceAnnotator)1