use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.
the class GrammarCompactor method addOneUnaryRule.
protected boolean addOneUnaryRule(UnaryRule rule, Map<String, TransducerGraph> graphs) {
String parentString = stateIndex.get(rule.parent);
String childString = stateIndex.get(rule.child);
if (isSyntheticState(parentString)) {
String topcat = getTopCategoryOfSyntheticState(parentString);
TransducerGraph graph = getGraphFromMap(graphs, topcat);
Double output = new Double(smartNegate(rule.score()));
graph.addArc(graph.getStartNode(), parentString, childString, output);
return true;
} else if (isSyntheticState(childString)) {
// need to add Arc from synthetic state to endState
TransducerGraph graph = getGraphFromMap(graphs, parentString);
Double output = new Double(smartNegate(rule.score()));
// parentString should the the same as endState
graph.addArc(childString, parentString, END, output);
graph.setEndNode(parentString);
return true;
} else {
return false;
}
}
use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.
the class GrammarCompactor method convertGraphsToGrammar.
/**
* @param graphs a Map from String categories to TransducerGraph objects
* @param unaryRules is a Set of UnaryRule objects that we need to add
* @param binaryRules is a Set of BinaryRule objects that we need to add
* @return a new Pair of UnaryGrammar, BinaryGrammar
*/
protected Pair<UnaryGrammar, BinaryGrammar> convertGraphsToGrammar(Set<TransducerGraph> graphs, Set<UnaryRule> unaryRules, Set<BinaryRule> binaryRules) {
// first go through all the existing rules and number them with new numberer
newStateIndex = new HashIndex<>();
for (UnaryRule rule : unaryRules) {
String parent = stateIndex.get(rule.parent);
rule.parent = newStateIndex.addToIndex(parent);
String child = stateIndex.get(rule.child);
rule.child = newStateIndex.addToIndex(child);
}
for (BinaryRule rule : binaryRules) {
String parent = stateIndex.get(rule.parent);
rule.parent = newStateIndex.addToIndex(parent);
String leftChild = stateIndex.get(rule.leftChild);
rule.leftChild = newStateIndex.addToIndex(leftChild);
String rightChild = stateIndex.get(rule.rightChild);
rule.rightChild = newStateIndex.addToIndex(rightChild);
}
// now go through the graphs and add the rules
for (TransducerGraph graph : graphs) {
Object startNode = graph.getStartNode();
for (Arc arc : graph.getArcs()) {
// TODO: make sure these are the strings we're looking for
String source = arc.getSourceNode().toString();
String target = arc.getTargetNode().toString();
Object input = arc.getInput();
String inputString = input.toString();
double output = ((Double) arc.getOutput()).doubleValue();
if (source.equals(startNode)) {
// make a UnaryRule
UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), smartNegate(output));
unaryRules.add(ur);
} else if (inputString.equals(END) || inputString.equals(EPSILON)) {
// make a UnaryRule
UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), smartNegate(output));
unaryRules.add(ur);
} else {
// make a BinaryRule
// figure out whether the input was generated on the left or right
int length = inputString.length();
char leftOrRight = inputString.charAt(length - 1);
inputString = inputString.substring(0, length - 1);
BinaryRule br;
if (leftOrRight == '<' || leftOrRight == '[') {
br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), newStateIndex.addToIndex(source), smartNegate(output));
} else if (leftOrRight == '>' || leftOrRight == ']') {
br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), newStateIndex.addToIndex(inputString), smartNegate(output));
} else {
throw new RuntimeException("Arc input is in unexpected format: " + arc);
}
binaryRules.add(br);
}
}
}
// by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
ClassicCounter<String> symbolCounter = new ClassicCounter<>();
if (outputType == RAW_COUNTS) {
// so we count parent symbol occurrences
for (UnaryRule rule : unaryRules) {
symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
}
for (BinaryRule rule : binaryRules) {
symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
}
}
// now we put the rules in the grammars
// this should be smaller than last one
int numStates = newStateIndex.size();
int numRules = 0;
UnaryGrammar ug = new UnaryGrammar(newStateIndex);
BinaryGrammar bg = new BinaryGrammar(newStateIndex);
for (UnaryRule rule : unaryRules) {
if (outputType == RAW_COUNTS) {
double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
rule.score = (float) Math.log(rule.score / count);
}
ug.addRule(rule);
numRules++;
}
for (BinaryRule rule : binaryRules) {
if (outputType == RAW_COUNTS) {
double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
rule.score = (float) Math.log((rule.score - op.trainOptions.ruleDiscount) / count);
}
bg.addRule(rule);
numRules++;
}
if (verbose) {
System.out.println("Number of minimized rules: " + numRules);
System.out.println("Number of minimized states: " + newStateIndex.size());
}
ug.purgeRules();
bg.splitRules();
return new Pair<>(ug, bg);
}
use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.
the class GrammarCompactor method addOneBinaryRule.
protected boolean addOneBinaryRule(BinaryRule rule, Map<String, TransducerGraph> graphs) {
// parent has to be synthetic in BinaryRule
String parentString = stateIndex.get(rule.parent);
String leftString = stateIndex.get(rule.leftChild);
String rightString = stateIndex.get(rule.rightChild);
String source, target, input;
String bracket = null;
if (op.trainOptions.markFinalStates) {
bracket = parentString.substring(parentString.length() - 1, parentString.length());
}
// the below test is not necessary with left to right grammars
if (isSyntheticState(leftString)) {
source = leftString;
input = rightString + (bracket == null ? ">" : bracket);
} else if (isSyntheticState(rightString)) {
source = rightString;
input = leftString + (bracket == null ? "<" : bracket);
} else {
// we don't know what to do with this rule
return false;
}
target = parentString;
// makes it a real 0 <= k <= infty
Double output = new Double(smartNegate(rule.score()));
String topcat = getTopCategoryOfSyntheticState(source);
if (topcat == null) {
throw new RuntimeException("can't have null topcat");
}
TransducerGraph graph = getGraphFromMap(graphs, topcat);
graph.addArc(source, target, input, output);
return true;
}
use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.
the class ExactGrammarCompactor method doCompaction.
@Override
protected TransducerGraph doCompaction(TransducerGraph graph, List l1, List l3) {
TransducerGraph result = graph;
if (saveGraphs) {
writeFile(result, "unminimized", (String) result.getEndNodes().iterator().next());
}
result = quasiDeterminizer.processGraph(result);
// combine outputs into inputs
result = new TransducerGraph(result, ocp);
// minimize the thing
result = minimizer.minimizeFA(result);
//result = new TransducerGraph(graph, otsp); // for debugging
// pull out strings from sets returned by minimizer
result = new TransducerGraph(result, ntsp);
// split outputs from inputs
result = new TransducerGraph(result, isp);
if (saveGraphs) {
writeFile(result, "exactminimized", (String) result.getEndNodes().iterator().next());
}
//log.info(TransducerGraph.testGraphPaths(graph, result, 100));
return result;
}
use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.
the class GrammarCompactor method compactGrammar.
/**
* Compacts the grammar specified by the Pair.
*
* @param grammar a Pair of grammars, ordered UnaryGrammar BinaryGrammar.
* @param allTrainPaths a Map from String passive constituents to Lists of paths
* @param allTestPaths a Map from String passive constituents to Lists of paths
* @return a Pair of grammars, ordered UnaryGrammar BinaryGrammar.
*/
public Triple<Index<String>, UnaryGrammar, BinaryGrammar> compactGrammar(Pair<UnaryGrammar, BinaryGrammar> grammar, Map<String, List<List<String>>> allTrainPaths, Map<String, List<List<String>>> allTestPaths, Index<String> originalStateIndex) {
// computed once for the whole grammar
inputPrior = computeInputPrior(allTrainPaths);
// BinaryGrammar bg = grammar.second;
this.stateIndex = originalStateIndex;
List<List<String>> trainPaths, testPaths;
Set<UnaryRule> unaryRules = Generics.newHashSet();
Set<BinaryRule> binaryRules = Generics.newHashSet();
Map<String, TransducerGraph> graphs = convertGrammarToGraphs(grammar, unaryRules, binaryRules);
compactedGraphs = Generics.newHashSet();
if (verbose) {
System.out.println("There are " + graphs.size() + " categories to compact.");
}
int i = 0;
for (Iterator<Entry<String, TransducerGraph>> graphIter = graphs.entrySet().iterator(); graphIter.hasNext(); ) {
Map.Entry<String, TransducerGraph> entry = graphIter.next();
String cat = entry.getKey();
TransducerGraph graph = entry.getValue();
if (verbose) {
System.out.println("About to compact grammar for " + cat + " with numNodes=" + graph.getNodes().size());
}
// to save memory
trainPaths = allTrainPaths.remove(cat);
if (trainPaths == null) {
trainPaths = new ArrayList<>();
}
// to save memory
testPaths = allTestPaths.remove(cat);
if (testPaths == null) {
testPaths = new ArrayList<>();
}
TransducerGraph compactedGraph = doCompaction(graph, trainPaths, testPaths);
i++;
if (verbose) {
System.out.println(i + ". Compacted grammar for " + cat + " from " + graph.getArcs().size() + " arcs to " + compactedGraph.getArcs().size() + " arcs.");
}
// to save memory, remove the last thing
graphIter.remove();
compactedGraphs.add(compactedGraph);
}
Pair<UnaryGrammar, BinaryGrammar> ugbg = convertGraphsToGrammar(compactedGraphs, unaryRules, binaryRules);
return new Triple<>(newStateIndex, ugbg.first(), ugbg.second());
}
Aggregations