use of edu.stanford.nlp.util.MapList in project CoreNLP by stanfordnlp.
the class SemanticGraphUtils method mapTreeToSg.
// -----------------------------------------------------------------------------------------------
// Tree matching code
// -----------------------------------------------------------------------------------------------
/**
* Given a CFG Tree parse, and the equivalent SemanticGraph derived from that Tree, generates a mapping
* from each of the tree terminals to the best-guess SemanticGraph node(s).
* This is performed using lexical matching, finding the nth match.
* NOTE: not all tree nodes may match a Semgraph node, esp. for tokens removed in a collapsed Semgraph,
* such as prepositions.
*/
public static Map<PositionedTree, IndexedWord> mapTreeToSg(Tree tree, SemanticGraph sg) {
// In order to keep track of positions, we store lists, in order encountered, of lex terms.
// e.g. lexToTreeNode.get("the").get(2) should point to the same word as lexToSemNode.get("the").get(2)
// Because IndexedWords may be collapsed together "A B" -> "A_B", we check the value of current(), and
// split on whitespace if present.
MapList<String, TreeNodeProxy> lexToTreeNode = new MapList<>();
MapList<String, IndexedWordProxy> lexToSemNode = new MapList<>();
for (Tree child : tree.getLeaves()) {
List<TreeNodeProxy> leafProxies = TreeNodeProxy.create(child, tree);
for (TreeNodeProxy proxy : leafProxies) lexToTreeNode.add(proxy.lex, proxy);
}
Map<IndexedWord, Integer> depthMap = Generics.newHashMap();
for (IndexedWord node : sg.vertexSet()) {
List<IndexedWord> path = sg.getPathToRoot(node);
if (path != null)
depthMap.put(node, path.size());
else
// Use an arbitrarily deep depth value, to trick it into never being used.
depthMap.put(node, 99999);
List<IndexedWordProxy> nodeProxies = IndexedWordProxy.create(node);
for (IndexedWordProxy proxy : nodeProxies) lexToSemNode.add(proxy.lex, proxy);
}
// Now the map-lists (string->position encountered indices) are populated,
// simply go through, finding matches.
// NOTE: we use TreeNodeProxy instead of keying off of Tree, as
// hash codes for Tree nodes do not consider position of the tree
// within a tree: two subtrees with the same layout and child
// labels will be equal.
Map<PositionedTree, IndexedWord> map = Generics.newHashMap();
for (String lex : lexToTreeNode.keySet()) {
for (int i = 0; i < lexToTreeNode.size(lex) && i < lexToSemNode.size(lex); i++) {
map.put(new PositionedTree(lexToTreeNode.get(lex, i).treeNode, tree), lexToSemNode.get(lex, i).node);
}
}
// tree non-terminals.
for (Tree nonTerm : tree) {
if (!nonTerm.isLeaf()) {
IndexedWord bestNode = null;
int bestScore = 99999;
for (Tree curr : nonTerm) {
IndexedWord equivNode = map.get(new PositionedTree(curr, tree));
if ((equivNode == null) || !depthMap.containsKey(equivNode))
continue;
int currScore = depthMap.get(equivNode);
if (currScore < bestScore) {
bestScore = currScore;
bestNode = equivNode;
}
}
if (bestNode != null) {
map.put(new PositionedTree(nonTerm, tree), bestNode);
}
}
}
return map;
}
Aggregations