Search in sources :

Example 61 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphFactory method makeFromVertices.

/**
   * Given a set of vertices, and the source graph they are drawn from, create a path composed
   * of the minimum paths between the vertices.  i.e. this is a simple brain-dead attempt at getting
   * something approximating a minimum spanning graph.
   *
   * NOTE: the hope is the vertices will already be contiguous, but facilities are added just in case for
   * adding additional nodes.
   */
public static SemanticGraph makeFromVertices(SemanticGraph sg, Collection<IndexedWord> nodes) {
    List<SemanticGraphEdge> edgesToAdd = new ArrayList<>();
    List<IndexedWord> nodesToAdd = new ArrayList<>(nodes);
    for (IndexedWord nodeA : nodes) {
        for (IndexedWord nodeB : nodes) {
            if (nodeA != nodeB) {
                List<SemanticGraphEdge> edges = sg.getShortestDirectedPathEdges(nodeA, nodeB);
                if (edges != null) {
                    edgesToAdd.addAll(edges);
                    for (SemanticGraphEdge edge : edges) {
                        IndexedWord gov = edge.getGovernor();
                        IndexedWord dep = edge.getDependent();
                        if (gov != null && !nodesToAdd.contains(gov)) {
                            nodesToAdd.add(gov);
                        }
                        if (dep != null && !nodesToAdd.contains(dep)) {
                            nodesToAdd.add(dep);
                        }
                    }
                }
            }
        }
    }
    SemanticGraph retSg = new SemanticGraph();
    for (IndexedWord node : nodesToAdd) {
        retSg.addVertex(node);
    }
    for (SemanticGraphEdge edge : edgesToAdd) {
        retSg.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
    }
    retSg.resetRoots();
    return retSg;
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 62 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphFormatter method formatSGNodeMultiline.

/**
   * Appends to this.out a multi-line string representation of the given
   * semantic graph, using the given number of spaces for indentation.

   * The semantic graph's label and each of its children appear on separate
   * lines.  A child may appear with a one-line or multi-line representation,
   * depending upon available space.
   */
private void formatSGNodeMultiline(SemanticGraph sg, IndexedWord node, int spaces) {
    out.append(LPAREN);
    out.append(formatLabel(node));
    if (smartIndent) {
        spaces += 1;
    } else {
        spaces += indent;
    }
    for (SemanticGraphEdge depcy : sg.getOutEdgesSorted(node)) {
        IndexedWord dep = depcy.getDependent();
        out.append("\n");
        out.append(StringUtils.repeat(SPACE, spaces));
        int sp = spaces;
        if (showRelns) {
            String reln = depcy.getRelation().toString();
            out.append(reln);
            out.append(COLON);
            if (smartIndent) {
                sp += (reln.length() + 1);
            }
        }
        if (!used.contains(dep)) {
            // avoid infinite loop
            formatSGNode(sg, dep, sp);
        }
    }
    out.append(RPAREN);
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 63 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphUtils method mapTreeToSg.

// -----------------------------------------------------------------------------------------------
// Tree matching code
// -----------------------------------------------------------------------------------------------
/**
   * Given a CFG Tree parse, and the equivalent SemanticGraph derived from that Tree, generates a mapping
   * from each of the tree terminals to the best-guess SemanticGraph node(s).
   * This is performed using lexical matching, finding the nth match.
   * NOTE: not all tree nodes may match a Semgraph node, esp. for tokens removed in a collapsed Semgraph,
   * such as prepositions.
   */
public static Map<PositionedTree, IndexedWord> mapTreeToSg(Tree tree, SemanticGraph sg) {
    // In order to keep track of positions, we store lists, in order encountered, of lex terms.
    // e.g. lexToTreeNode.get("the").get(2) should point to the same word as lexToSemNode.get("the").get(2)
    // Because IndexedWords may be collapsed together "A B" -> "A_B", we check the value of current(), and
    // split on whitespace if present.
    MapList<String, TreeNodeProxy> lexToTreeNode = new MapList<>();
    MapList<String, IndexedWordProxy> lexToSemNode = new MapList<>();
    for (Tree child : tree.getLeaves()) {
        List<TreeNodeProxy> leafProxies = TreeNodeProxy.create(child, tree);
        for (TreeNodeProxy proxy : leafProxies) lexToTreeNode.add(proxy.lex, proxy);
    }
    Map<IndexedWord, Integer> depthMap = Generics.newHashMap();
    for (IndexedWord node : sg.vertexSet()) {
        List<IndexedWord> path = sg.getPathToRoot(node);
        if (path != null)
            depthMap.put(node, path.size());
        else
            // Use an arbitrarily deep depth value, to trick it into never being used.
            depthMap.put(node, 99999);
        List<IndexedWordProxy> nodeProxies = IndexedWordProxy.create(node);
        for (IndexedWordProxy proxy : nodeProxies) lexToSemNode.add(proxy.lex, proxy);
    }
    // Now the map-lists (string->position encountered indices) are populated,
    // simply go through, finding matches.
    // NOTE: we use TreeNodeProxy instead of keying off of Tree, as
    // hash codes for Tree nodes do not consider position of the tree
    // within a tree: two subtrees with the same layout and child
    // labels will be equal.
    Map<PositionedTree, IndexedWord> map = Generics.newHashMap();
    for (String lex : lexToTreeNode.keySet()) {
        for (int i = 0; i < lexToTreeNode.size(lex) && i < lexToSemNode.size(lex); i++) {
            map.put(new PositionedTree(lexToTreeNode.get(lex, i).treeNode, tree), lexToSemNode.get(lex, i).node);
        }
    }
    // tree non-terminals.
    for (Tree nonTerm : tree) {
        if (!nonTerm.isLeaf()) {
            IndexedWord bestNode = null;
            int bestScore = 99999;
            for (Tree curr : nonTerm) {
                IndexedWord equivNode = map.get(new PositionedTree(curr, tree));
                if ((equivNode == null) || !depthMap.containsKey(equivNode))
                    continue;
                int currScore = depthMap.get(equivNode);
                if (currScore < bestScore) {
                    bestScore = currScore;
                    bestNode = equivNode;
                }
            }
            if (bestNode != null) {
                map.put(new PositionedTree(nonTerm, tree), bestNode);
            }
        }
    }
    return map;
}
Also used : MapList(edu.stanford.nlp.util.MapList) Tree(edu.stanford.nlp.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 64 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphUtils method setSentIndex.

/**
   * GIven a graph, returns a new graph with the the new sentence index enforced.
   * NOTE: new vertices are inserted.
   * TODO: is this ok?  rewrite this?
   */
public static SemanticGraph setSentIndex(SemanticGraph sg, int newSentIndex) {
    SemanticGraph newGraph = new SemanticGraph(sg);
    List<IndexedWord> prevRoots = new ArrayList<>(newGraph.getRoots());
    List<IndexedWord> newRoots = new ArrayList<>();
    // vertices while iterating.  Perhaps there is a better way to do it.
    for (IndexedWord node : newGraph.vertexListSorted()) {
        IndexedWord newWord = new IndexedWord(node);
        newWord.setSentIndex(newSentIndex);
        SemanticGraphUtils.replaceNode(newWord, node, newGraph);
        if (prevRoots.contains(node))
            newRoots.add(newWord);
    }
    newGraph.setRoots(newRoots);
    return newGraph;
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 65 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphUtils method killNonRooted.

/**
   * Deletes all nodes that are not rooted (such as dangling vertices after a series of
   * edges have been chopped).
   */
public static void killNonRooted(SemanticGraph sg) {
    List<IndexedWord> nodes = new ArrayList<>(sg.vertexSet());
    // Hack: store all of the nodes we know are in the rootset
    Set<IndexedWord> guaranteed = Generics.newHashSet();
    for (IndexedWord root : sg.getRoots()) {
        guaranteed.add(root);
        guaranteed.addAll(sg.descendants(root));
    }
    for (IndexedWord node : nodes) {
        if (!guaranteed.contains(node)) {
            sg.removeVertex(node);
        }
    }
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5