use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraph method yieldSpan.
/**
* Returns the span of the subtree yield of this node. That is, the span of all the nodes under it.
* In the case of projective graphs, the words in this span are also the yield of the constituent rooted
* at this node.
*
* @param word The word acting as the root of the constituent we are finding.
* @return A span, represented as a pair of integers. The span is zero indexed. The begin is inclusive and the end is exclusive.
*/
public Pair<Integer, Integer> yieldSpan(IndexedWord word) {
int min = Integer.MAX_VALUE;
int max = Integer.MIN_VALUE;
Stack<IndexedWord> fringe = new Stack<>();
fringe.push(word);
while (!fringe.isEmpty()) {
IndexedWord parent = fringe.pop();
min = Math.min(min, parent.index() - 1);
max = Math.max(max, parent.index());
for (SemanticGraphEdge edge : outgoingEdgeIterable(parent)) {
if (!edge.isExtra()) {
fringe.push(edge.getDependent());
}
}
}
return Pair.makePair(min, max);
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphFactory method makeFromGraphs.
/**
* Given a list of graphs, constructs a new graph combined from the
* collection of graphs. Original vertices are used, edges are
* copied. Graphs are ordered by the sentence index and index of
* the original vertices. Intent is to create a "mega graph"
* similar to the graphs used in the RTE problem.
* <br>
* This method only works if the indexed words have different
* sentence ids, as otherwise the maps used will confuse several of
* the IndexedWords.
*/
public static SemanticGraph makeFromGraphs(Collection<SemanticGraph> sgList) {
SemanticGraph sg = new SemanticGraph();
Collection<IndexedWord> newRoots = Generics.newHashSet();
for (SemanticGraph currSg : sgList) {
newRoots.addAll(currSg.getRoots());
for (IndexedWord currVertex : currSg.vertexSet()) sg.addVertex(currVertex);
for (SemanticGraphEdge currEdge : currSg.edgeIterable()) sg.addEdge(currEdge.getGovernor(), currEdge.getDependent(), currEdge.getRelation(), currEdge.getWeight(), currEdge.isExtra());
}
sg.setRoots(newRoots);
return sg;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphFactory method makeFromVertices.
/**
* Given a set of vertices, and the source graph they are drawn from, create a path composed
* of the minimum paths between the vertices. i.e. this is a simple brain-dead attempt at getting
* something approximating a minimum spanning graph.
*
* NOTE: the hope is the vertices will already be contiguous, but facilities are added just in case for
* adding additional nodes.
*/
public static SemanticGraph makeFromVertices(SemanticGraph sg, Collection<IndexedWord> nodes) {
List<SemanticGraphEdge> edgesToAdd = new ArrayList<>();
List<IndexedWord> nodesToAdd = new ArrayList<>(nodes);
for (IndexedWord nodeA : nodes) {
for (IndexedWord nodeB : nodes) {
if (nodeA != nodeB) {
List<SemanticGraphEdge> edges = sg.getShortestDirectedPathEdges(nodeA, nodeB);
if (edges != null) {
edgesToAdd.addAll(edges);
for (SemanticGraphEdge edge : edges) {
IndexedWord gov = edge.getGovernor();
IndexedWord dep = edge.getDependent();
if (gov != null && !nodesToAdd.contains(gov)) {
nodesToAdd.add(gov);
}
if (dep != null && !nodesToAdd.contains(dep)) {
nodesToAdd.add(dep);
}
}
}
}
}
}
SemanticGraph retSg = new SemanticGraph();
for (IndexedWord node : nodesToAdd) {
retSg.addVertex(node);
}
for (SemanticGraphEdge edge : edgesToAdd) {
retSg.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
}
retSg.resetRoots();
return retSg;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphFormatter method formatSGNodeMultiline.
/**
* Appends to this.out a multi-line string representation of the given
* semantic graph, using the given number of spaces for indentation.
* The semantic graph's label and each of its children appear on separate
* lines. A child may appear with a one-line or multi-line representation,
* depending upon available space.
*/
private void formatSGNodeMultiline(SemanticGraph sg, IndexedWord node, int spaces) {
out.append(LPAREN);
out.append(formatLabel(node));
if (smartIndent) {
spaces += 1;
} else {
spaces += indent;
}
for (SemanticGraphEdge depcy : sg.getOutEdgesSorted(node)) {
IndexedWord dep = depcy.getDependent();
out.append("\n");
out.append(StringUtils.repeat(SPACE, spaces));
int sp = spaces;
if (showRelns) {
String reln = depcy.getRelation().toString();
out.append(reln);
out.append(COLON);
if (smartIndent) {
sp += (reln.length() + 1);
}
}
if (!used.contains(dep)) {
// avoid infinite loop
formatSGNode(sg, dep, sp);
}
}
out.append(RPAREN);
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphUtils method mapTreeToSg.
// -----------------------------------------------------------------------------------------------
// Tree matching code
// -----------------------------------------------------------------------------------------------
/**
* Given a CFG Tree parse, and the equivalent SemanticGraph derived from that Tree, generates a mapping
* from each of the tree terminals to the best-guess SemanticGraph node(s).
* This is performed using lexical matching, finding the nth match.
* NOTE: not all tree nodes may match a Semgraph node, esp. for tokens removed in a collapsed Semgraph,
* such as prepositions.
*/
public static Map<PositionedTree, IndexedWord> mapTreeToSg(Tree tree, SemanticGraph sg) {
// In order to keep track of positions, we store lists, in order encountered, of lex terms.
// e.g. lexToTreeNode.get("the").get(2) should point to the same word as lexToSemNode.get("the").get(2)
// Because IndexedWords may be collapsed together "A B" -> "A_B", we check the value of current(), and
// split on whitespace if present.
MapList<String, TreeNodeProxy> lexToTreeNode = new MapList<>();
MapList<String, IndexedWordProxy> lexToSemNode = new MapList<>();
for (Tree child : tree.getLeaves()) {
List<TreeNodeProxy> leafProxies = TreeNodeProxy.create(child, tree);
for (TreeNodeProxy proxy : leafProxies) lexToTreeNode.add(proxy.lex, proxy);
}
Map<IndexedWord, Integer> depthMap = Generics.newHashMap();
for (IndexedWord node : sg.vertexSet()) {
List<IndexedWord> path = sg.getPathToRoot(node);
if (path != null)
depthMap.put(node, path.size());
else
// Use an arbitrarily deep depth value, to trick it into never being used.
depthMap.put(node, 99999);
List<IndexedWordProxy> nodeProxies = IndexedWordProxy.create(node);
for (IndexedWordProxy proxy : nodeProxies) lexToSemNode.add(proxy.lex, proxy);
}
// Now the map-lists (string->position encountered indices) are populated,
// simply go through, finding matches.
// NOTE: we use TreeNodeProxy instead of keying off of Tree, as
// hash codes for Tree nodes do not consider position of the tree
// within a tree: two subtrees with the same layout and child
// labels will be equal.
Map<PositionedTree, IndexedWord> map = Generics.newHashMap();
for (String lex : lexToTreeNode.keySet()) {
for (int i = 0; i < lexToTreeNode.size(lex) && i < lexToSemNode.size(lex); i++) {
map.put(new PositionedTree(lexToTreeNode.get(lex, i).treeNode, tree), lexToSemNode.get(lex, i).node);
}
}
// tree non-terminals.
for (Tree nonTerm : tree) {
if (!nonTerm.isLeaf()) {
IndexedWord bestNode = null;
int bestScore = 99999;
for (Tree curr : nonTerm) {
IndexedWord equivNode = map.get(new PositionedTree(curr, tree));
if ((equivNode == null) || !depthMap.containsKey(equivNode))
continue;
int currScore = depthMap.get(equivNode);
if (currScore < bestScore) {
bestScore = currScore;
bestNode = equivNode;
}
}
if (bestNode != null) {
map.put(new PositionedTree(nonTerm, tree), bestNode);
}
}
}
return map;
}
Aggregations