Search in sources :

Example 51 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method processComplex2WP.

/**
   * Processes all the two-word prepositions in TWO_WORD_PREPS_COMPLEX.
   */
private static void processComplex2WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> bigrams) {
    for (String bigram : TWO_WORD_PREPS_COMPLEX) {
        if (bigrams.get(bigram) == null) {
            continue;
        }
        for (Integer i : bigrams.get(bigram)) {
            IndexedWord w1 = sg.getNodeByIndexSafe(i);
            IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
            if (w1 == null || w2 == null) {
                continue;
            }
            SemgrexMatcher matcher = TWO_WORD_PREPS_COMPLEX_PATTERN.matcher(sg);
            IndexedWord gov = null;
            IndexedWord gov2 = null;
            while (matcher.find()) {
                if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2"))) {
                    gov = matcher.getNode("gov");
                    gov2 = matcher.getNode("gov2");
                    break;
                }
            }
            if (gov2 == null) {
                continue;
            }
            /* Attach the head of the prepositional phrase to
         * the head of w1. */
            if (sg.getRoots().contains(w1)) {
                SemanticGraphEdge edge = sg.getEdge(w1, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                sg.getRoots().remove(w1);
                sg.addRoot(gov2);
            } else {
                SemanticGraphEdge edge = sg.getEdge(w1, gov2);
                if (edge == null) {
                    continue;
                }
                sg.removeEdge(edge);
                gov = gov == null ? sg.getParent(w1) : gov;
                if (gov == null) {
                    continue;
                }
                /* Determine the relation to use. If it is a relation that can
           * join two clauses and w1 is the head of a copular construction, then
           * use the relation of w1 and its parent. Otherwise use the relation of edge. */
                GrammaticalRelation reln = edge.getRelation();
                if (sg.hasChildWithReln(w1, COPULA)) {
                    GrammaticalRelation reln2 = sg.getEdge(gov, w1).getRelation();
                    if (clauseRelations.contains(reln2)) {
                        reln = reln2;
                    }
                }
                sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
            }
            /* Make children of w1 dependents of gov2. */
            for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w1)) {
                sg.removeEdge(edge2);
                sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
            }
            createMultiWordExpression(sg, gov2, CASE_MARKER, w1, w2);
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 52 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method processNames.

/**
   *
   * Looks for NPs that should have the {@code name} relation and
   * a) changes the structure such that the leftmost token becomes the head
   * b) changes the relation from {@code compound} to {@code name}.
   *
   * Requires NER tags.
   *
   * @param sg A semantic graph.
   */
private static void processNames(SemanticGraph sg) {
    if (!USE_NAME) {
        return;
    }
    // check whether NER tags are available
    IndexedWord rootToken = sg.getFirstRoot();
    if (rootToken == null || !rootToken.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) {
        return;
    }
    SemanticGraph sgCopy = sg.makeSoftCopy();
    for (SemgrexPattern pattern : NAME_PATTERNS) {
        SemgrexMatcher matcher = pattern.matcher(sgCopy);
        List<IndexedWord> nameParts = new ArrayList<>();
        IndexedWord head = null;
        while (matcher.find()) {
            IndexedWord w1 = matcher.getNode("w1");
            IndexedWord w2 = matcher.getNode("w2");
            if (head != w1) {
                if (head != null) {
                    processNamesHelper(sg, head, nameParts);
                    nameParts = new ArrayList<>();
                }
                head = w1;
            }
            if (w2.ner().equals(w1.ner())) {
                nameParts.add(w2);
            }
        }
        if (head != null) {
            processNamesHelper(sg, head, nameParts);
            sgCopy = sg.makeSoftCopy();
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 53 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class NaturalLogicWeightsITest method mkSegment.

private Pair<SemanticGraphEdge, List<SemanticGraphEdge>> mkSegment(String root, Pair<String, String>... outEdges) {
    IndexedWord rootVertex = new IndexedWord(mockWord(root));
    List<SemanticGraphEdge> edges = Arrays.asList(outEdges).stream().map(pair -> new SemanticGraphEdge(rootVertex, new IndexedWord(mockWord(pair.second)), GrammaticalRelation.valueOf(Language.English, pair.first), Double.NEGATIVE_INFINITY, false)).collect(Collectors.toList());
    return Pair.makePair(edges.get(0), edges);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Arrays(java.util.Arrays) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Supplier(java.util.function.Supplier) Collectors(java.util.stream.Collectors) List(java.util.List) Ignore(org.junit.Ignore) Language(edu.stanford.nlp.international.Language) Pair(edu.stanford.nlp.util.Pair) IndexedWord(edu.stanford.nlp.ling.IndexedWord) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 54 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class DependencyIndexITest method checkTree.

private static void checkTree(Tree tree) {
    List<Tree> leaves = tree.getLeaves();
    for (Tree leaf : leaves) {
        CoreLabel l = null;
        if (leaf.label() instanceof CoreLabel)
            l = (CoreLabel) leaf.label();
        if (l != null) {
            // System.err.println(l + " " + l.get(CoreAnnotations.IndexAnnotation.class));
            int index = l.get(CoreAnnotations.IndexAnnotation.class);
            String text = l.get(CoreAnnotations.TextAnnotation.class);
            if (text.equals("Mary"))
                assertEquals(1, index);
            else if (text.equals("had"))
                assertEquals(2, index);
            else if (text.equals("a"))
                assertEquals(3, index);
            else if (text.equals("little"))
                assertEquals(4, index);
            else if (text.equals("lamb"))
                assertEquals(5, index);
            else if (text.equals("."))
                assertEquals(6, index);
        } else {
        // System.err.println(leaf + " is not a CoreLabel.");
        }
    }
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> deps = gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL);
    // System.out.println(deps);
    // collect all nodes in deps
    Set<IndexedWord> nodes = Generics.newHashSet();
    for (TypedDependency dep : deps) {
        nodes.add(dep.gov());
        nodes.add(dep.dep());
    }
    // check the indices for all nodes
    for (IndexedWord n : nodes) {
        String text = n.value();
        int index = n.get(CoreAnnotations.IndexAnnotation.class);
        if (text.equals("Mary"))
            assertEquals(1, index);
        else if (text.equals("had"))
            assertEquals(2, index);
        else if (text.equals("a"))
            assertEquals(3, index);
        else if (text.equals("little"))
            assertEquals(4, index);
        else if (text.equals("lamb"))
            assertEquals(5, index);
        else if (text.equals("."))
            assertEquals(6, index);
    }
}
Also used : TypedDependency(edu.stanford.nlp.trees.TypedDependency) PennTreebankLanguagePack(edu.stanford.nlp.trees.PennTreebankLanguagePack) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) GrammaticalStructureFactory(edu.stanford.nlp.trees.GrammaticalStructureFactory) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) PennTreebankLanguagePack(edu.stanford.nlp.trees.PennTreebankLanguagePack) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 55 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraph method toReadableString.

private String toReadableString() {
    StringBuilder buf = new StringBuilder();
    buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov"));
    buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---"));
    for (IndexedWord root : getRoots()) {
        buf.append(String.format("%-20s%-20s%-20s%n", root.toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), "root", "root"));
    }
    for (SemanticGraphEdge edge : this.edgeListSorted()) {
        buf.append(String.format("%-20s%-20s%-20s%n", edge.getTarget().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), edge.getRelation().toString(), edge.getSource().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX)));
    }
    return buf.toString();
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5