Search in sources :

Example 16 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraph method yieldSpan.

/**
   * Returns the span of the subtree yield of this node. That is, the span of all the nodes under it.
   * In the case of projective graphs, the words in this span are also the yield of the constituent rooted
   * at this node.
   *
   * @param word The word acting as the root of the constituent we are finding.
   * @return A span, represented as a pair of integers. The span is zero indexed. The begin is inclusive and the end is exclusive.
   */
public Pair<Integer, Integer> yieldSpan(IndexedWord word) {
    int min = Integer.MAX_VALUE;
    int max = Integer.MIN_VALUE;
    Stack<IndexedWord> fringe = new Stack<>();
    fringe.push(word);
    while (!fringe.isEmpty()) {
        IndexedWord parent = fringe.pop();
        min = Math.min(min, parent.index() - 1);
        max = Math.max(max, parent.index());
        for (SemanticGraphEdge edge : outgoingEdgeIterable(parent)) {
            if (!edge.isExtra()) {
                fringe.push(edge.getDependent());
            }
        }
    }
    return Pair.makePair(min, max);
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 17 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphFactory method makeFromGraphs.

/**
   * Given a list of graphs, constructs a new graph combined from the
   * collection of graphs.  Original vertices are used, edges are
   * copied.  Graphs are ordered by the sentence index and index of
   * the original vertices.  Intent is to create a "mega graph"
   * similar to the graphs used in the RTE problem.
   * <br>
   * This method only works if the indexed words have different
   * sentence ids, as otherwise the maps used will confuse several of
   * the IndexedWords.
   */
public static SemanticGraph makeFromGraphs(Collection<SemanticGraph> sgList) {
    SemanticGraph sg = new SemanticGraph();
    Collection<IndexedWord> newRoots = Generics.newHashSet();
    for (SemanticGraph currSg : sgList) {
        newRoots.addAll(currSg.getRoots());
        for (IndexedWord currVertex : currSg.vertexSet()) sg.addVertex(currVertex);
        for (SemanticGraphEdge currEdge : currSg.edgeIterable()) sg.addEdge(currEdge.getGovernor(), currEdge.getDependent(), currEdge.getRelation(), currEdge.getWeight(), currEdge.isExtra());
    }
    sg.setRoots(newRoots);
    return sg;
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 18 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphFactory method makeFromVertices.

/**
   * Given a set of vertices, and the source graph they are drawn from, create a path composed
   * of the minimum paths between the vertices.  i.e. this is a simple brain-dead attempt at getting
   * something approximating a minimum spanning graph.
   *
   * NOTE: the hope is the vertices will already be contiguous, but facilities are added just in case for
   * adding additional nodes.
   */
public static SemanticGraph makeFromVertices(SemanticGraph sg, Collection<IndexedWord> nodes) {
    List<SemanticGraphEdge> edgesToAdd = new ArrayList<>();
    List<IndexedWord> nodesToAdd = new ArrayList<>(nodes);
    for (IndexedWord nodeA : nodes) {
        for (IndexedWord nodeB : nodes) {
            if (nodeA != nodeB) {
                List<SemanticGraphEdge> edges = sg.getShortestDirectedPathEdges(nodeA, nodeB);
                if (edges != null) {
                    edgesToAdd.addAll(edges);
                    for (SemanticGraphEdge edge : edges) {
                        IndexedWord gov = edge.getGovernor();
                        IndexedWord dep = edge.getDependent();
                        if (gov != null && !nodesToAdd.contains(gov)) {
                            nodesToAdd.add(gov);
                        }
                        if (dep != null && !nodesToAdd.contains(dep)) {
                            nodesToAdd.add(dep);
                        }
                    }
                }
            }
        }
    }
    SemanticGraph retSg = new SemanticGraph();
    for (IndexedWord node : nodesToAdd) {
        retSg.addVertex(node);
    }
    for (SemanticGraphEdge edge : edgesToAdd) {
        retSg.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
    }
    retSg.resetRoots();
    return retSg;
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 19 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphFormatter method formatSGNodeMultiline.

/**
   * Appends to this.out a multi-line string representation of the given
   * semantic graph, using the given number of spaces for indentation.

   * The semantic graph's label and each of its children appear on separate
   * lines.  A child may appear with a one-line or multi-line representation,
   * depending upon available space.
   */
private void formatSGNodeMultiline(SemanticGraph sg, IndexedWord node, int spaces) {
    out.append(LPAREN);
    out.append(formatLabel(node));
    if (smartIndent) {
        spaces += 1;
    } else {
        spaces += indent;
    }
    for (SemanticGraphEdge depcy : sg.getOutEdgesSorted(node)) {
        IndexedWord dep = depcy.getDependent();
        out.append("\n");
        out.append(StringUtils.repeat(SPACE, spaces));
        int sp = spaces;
        if (showRelns) {
            String reln = depcy.getRelation().toString();
            out.append(reln);
            out.append(COLON);
            if (smartIndent) {
                sp += (reln.length() + 1);
            }
        }
        if (!used.contains(dep)) {
            // avoid infinite loop
            formatSGNode(sg, dep, sp);
        }
    }
    out.append(RPAREN);
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 20 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class SemanticGraphUtils method mapTreeToSg.

// -----------------------------------------------------------------------------------------------
// Tree matching code
// -----------------------------------------------------------------------------------------------
/**
   * Given a CFG Tree parse, and the equivalent SemanticGraph derived from that Tree, generates a mapping
   * from each of the tree terminals to the best-guess SemanticGraph node(s).
   * This is performed using lexical matching, finding the nth match.
   * NOTE: not all tree nodes may match a Semgraph node, esp. for tokens removed in a collapsed Semgraph,
   * such as prepositions.
   */
public static Map<PositionedTree, IndexedWord> mapTreeToSg(Tree tree, SemanticGraph sg) {
    // In order to keep track of positions, we store lists, in order encountered, of lex terms.
    // e.g. lexToTreeNode.get("the").get(2) should point to the same word as lexToSemNode.get("the").get(2)
    // Because IndexedWords may be collapsed together "A B" -> "A_B", we check the value of current(), and
    // split on whitespace if present.
    MapList<String, TreeNodeProxy> lexToTreeNode = new MapList<>();
    MapList<String, IndexedWordProxy> lexToSemNode = new MapList<>();
    for (Tree child : tree.getLeaves()) {
        List<TreeNodeProxy> leafProxies = TreeNodeProxy.create(child, tree);
        for (TreeNodeProxy proxy : leafProxies) lexToTreeNode.add(proxy.lex, proxy);
    }
    Map<IndexedWord, Integer> depthMap = Generics.newHashMap();
    for (IndexedWord node : sg.vertexSet()) {
        List<IndexedWord> path = sg.getPathToRoot(node);
        if (path != null)
            depthMap.put(node, path.size());
        else
            // Use an arbitrarily deep depth value, to trick it into never being used.
            depthMap.put(node, 99999);
        List<IndexedWordProxy> nodeProxies = IndexedWordProxy.create(node);
        for (IndexedWordProxy proxy : nodeProxies) lexToSemNode.add(proxy.lex, proxy);
    }
    // Now the map-lists (string->position encountered indices) are populated,
    // simply go through, finding matches.
    // NOTE: we use TreeNodeProxy instead of keying off of Tree, as
    // hash codes for Tree nodes do not consider position of the tree
    // within a tree: two subtrees with the same layout and child
    // labels will be equal.
    Map<PositionedTree, IndexedWord> map = Generics.newHashMap();
    for (String lex : lexToTreeNode.keySet()) {
        for (int i = 0; i < lexToTreeNode.size(lex) && i < lexToSemNode.size(lex); i++) {
            map.put(new PositionedTree(lexToTreeNode.get(lex, i).treeNode, tree), lexToSemNode.get(lex, i).node);
        }
    }
    // tree non-terminals.
    for (Tree nonTerm : tree) {
        if (!nonTerm.isLeaf()) {
            IndexedWord bestNode = null;
            int bestScore = 99999;
            for (Tree curr : nonTerm) {
                IndexedWord equivNode = map.get(new PositionedTree(curr, tree));
                if ((equivNode == null) || !depthMap.containsKey(equivNode))
                    continue;
                int currScore = depthMap.get(equivNode);
                if (currScore < bestScore) {
                    bestScore = currScore;
                    bestNode = equivNode;
                }
            }
            if (bestNode != null) {
                map.put(new PositionedTree(nonTerm, tree), bestNode);
            }
        }
    }
    return map;
}
Also used : MapList(edu.stanford.nlp.util.MapList) Tree(edu.stanford.nlp.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5