use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphFactory method makeFromVertices.
/**
* Given a set of vertices, and the source graph they are drawn from, create a path composed
* of the minimum paths between the vertices. i.e. this is a simple brain-dead attempt at getting
* something approximating a minimum spanning graph.
*
* NOTE: the hope is the vertices will already be contiguous, but facilities are added just in case for
* adding additional nodes.
*/
public static SemanticGraph makeFromVertices(SemanticGraph sg, Collection<IndexedWord> nodes) {
List<SemanticGraphEdge> edgesToAdd = new ArrayList<>();
List<IndexedWord> nodesToAdd = new ArrayList<>(nodes);
for (IndexedWord nodeA : nodes) {
for (IndexedWord nodeB : nodes) {
if (nodeA != nodeB) {
List<SemanticGraphEdge> edges = sg.getShortestDirectedPathEdges(nodeA, nodeB);
if (edges != null) {
edgesToAdd.addAll(edges);
for (SemanticGraphEdge edge : edges) {
IndexedWord gov = edge.getGovernor();
IndexedWord dep = edge.getDependent();
if (gov != null && !nodesToAdd.contains(gov)) {
nodesToAdd.add(gov);
}
if (dep != null && !nodesToAdd.contains(dep)) {
nodesToAdd.add(dep);
}
}
}
}
}
}
SemanticGraph retSg = new SemanticGraph();
for (IndexedWord node : nodesToAdd) {
retSg.addVertex(node);
}
for (SemanticGraphEdge edge : edgesToAdd) {
retSg.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
}
retSg.resetRoots();
return retSg;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphFormatter method formatSGNodeMultiline.
/**
* Appends to this.out a multi-line string representation of the given
* semantic graph, using the given number of spaces for indentation.
* The semantic graph's label and each of its children appear on separate
* lines. A child may appear with a one-line or multi-line representation,
* depending upon available space.
*/
private void formatSGNodeMultiline(SemanticGraph sg, IndexedWord node, int spaces) {
out.append(LPAREN);
out.append(formatLabel(node));
if (smartIndent) {
spaces += 1;
} else {
spaces += indent;
}
for (SemanticGraphEdge depcy : sg.getOutEdgesSorted(node)) {
IndexedWord dep = depcy.getDependent();
out.append("\n");
out.append(StringUtils.repeat(SPACE, spaces));
int sp = spaces;
if (showRelns) {
String reln = depcy.getRelation().toString();
out.append(reln);
out.append(COLON);
if (smartIndent) {
sp += (reln.length() + 1);
}
}
if (!used.contains(dep)) {
// avoid infinite loop
formatSGNode(sg, dep, sp);
}
}
out.append(RPAREN);
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphUtils method mapTreeToSg.
// -----------------------------------------------------------------------------------------------
// Tree matching code
// -----------------------------------------------------------------------------------------------
/**
* Given a CFG Tree parse, and the equivalent SemanticGraph derived from that Tree, generates a mapping
* from each of the tree terminals to the best-guess SemanticGraph node(s).
* This is performed using lexical matching, finding the nth match.
* NOTE: not all tree nodes may match a Semgraph node, esp. for tokens removed in a collapsed Semgraph,
* such as prepositions.
*/
public static Map<PositionedTree, IndexedWord> mapTreeToSg(Tree tree, SemanticGraph sg) {
// In order to keep track of positions, we store lists, in order encountered, of lex terms.
// e.g. lexToTreeNode.get("the").get(2) should point to the same word as lexToSemNode.get("the").get(2)
// Because IndexedWords may be collapsed together "A B" -> "A_B", we check the value of current(), and
// split on whitespace if present.
MapList<String, TreeNodeProxy> lexToTreeNode = new MapList<>();
MapList<String, IndexedWordProxy> lexToSemNode = new MapList<>();
for (Tree child : tree.getLeaves()) {
List<TreeNodeProxy> leafProxies = TreeNodeProxy.create(child, tree);
for (TreeNodeProxy proxy : leafProxies) lexToTreeNode.add(proxy.lex, proxy);
}
Map<IndexedWord, Integer> depthMap = Generics.newHashMap();
for (IndexedWord node : sg.vertexSet()) {
List<IndexedWord> path = sg.getPathToRoot(node);
if (path != null)
depthMap.put(node, path.size());
else
// Use an arbitrarily deep depth value, to trick it into never being used.
depthMap.put(node, 99999);
List<IndexedWordProxy> nodeProxies = IndexedWordProxy.create(node);
for (IndexedWordProxy proxy : nodeProxies) lexToSemNode.add(proxy.lex, proxy);
}
// Now the map-lists (string->position encountered indices) are populated,
// simply go through, finding matches.
// NOTE: we use TreeNodeProxy instead of keying off of Tree, as
// hash codes for Tree nodes do not consider position of the tree
// within a tree: two subtrees with the same layout and child
// labels will be equal.
Map<PositionedTree, IndexedWord> map = Generics.newHashMap();
for (String lex : lexToTreeNode.keySet()) {
for (int i = 0; i < lexToTreeNode.size(lex) && i < lexToSemNode.size(lex); i++) {
map.put(new PositionedTree(lexToTreeNode.get(lex, i).treeNode, tree), lexToSemNode.get(lex, i).node);
}
}
// tree non-terminals.
for (Tree nonTerm : tree) {
if (!nonTerm.isLeaf()) {
IndexedWord bestNode = null;
int bestScore = 99999;
for (Tree curr : nonTerm) {
IndexedWord equivNode = map.get(new PositionedTree(curr, tree));
if ((equivNode == null) || !depthMap.containsKey(equivNode))
continue;
int currScore = depthMap.get(equivNode);
if (currScore < bestScore) {
bestScore = currScore;
bestNode = equivNode;
}
}
if (bestNode != null) {
map.put(new PositionedTree(nonTerm, tree), bestNode);
}
}
}
return map;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphUtils method setSentIndex.
/**
* GIven a graph, returns a new graph with the the new sentence index enforced.
* NOTE: new vertices are inserted.
* TODO: is this ok? rewrite this?
*/
public static SemanticGraph setSentIndex(SemanticGraph sg, int newSentIndex) {
SemanticGraph newGraph = new SemanticGraph(sg);
List<IndexedWord> prevRoots = new ArrayList<>(newGraph.getRoots());
List<IndexedWord> newRoots = new ArrayList<>();
// vertices while iterating. Perhaps there is a better way to do it.
for (IndexedWord node : newGraph.vertexListSorted()) {
IndexedWord newWord = new IndexedWord(node);
newWord.setSentIndex(newSentIndex);
SemanticGraphUtils.replaceNode(newWord, node, newGraph);
if (prevRoots.contains(node))
newRoots.add(newWord);
}
newGraph.setRoots(newRoots);
return newGraph;
}
use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.
the class SemanticGraphUtils method killNonRooted.
/**
* Deletes all nodes that are not rooted (such as dangling vertices after a series of
* edges have been chopped).
*/
public static void killNonRooted(SemanticGraph sg) {
List<IndexedWord> nodes = new ArrayList<>(sg.vertexSet());
// Hack: store all of the nodes we know are in the rootset
Set<IndexedWord> guaranteed = Generics.newHashSet();
for (IndexedWord root : sg.getRoots()) {
guaranteed.add(root);
guaranteed.addAll(sg.descendants(root));
}
for (IndexedWord node : nodes) {
if (!guaranteed.contains(node)) {
sg.removeVertex(node);
}
}
}
Aggregations