use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.
the class GrammarCompactor method convertGraphsToGrammar.
/**
* @param graphs a Map from String categories to TransducerGraph objects
* @param unaryRules is a Set of UnaryRule objects that we need to add
* @param binaryRules is a Set of BinaryRule objects that we need to add
* @return a new Pair of UnaryGrammar, BinaryGrammar
*/
protected Pair<UnaryGrammar, BinaryGrammar> convertGraphsToGrammar(Set<TransducerGraph> graphs, Set<UnaryRule> unaryRules, Set<BinaryRule> binaryRules) {
// first go through all the existing rules and number them with new numberer
newStateIndex = new HashIndex<>();
for (UnaryRule rule : unaryRules) {
String parent = stateIndex.get(rule.parent);
rule.parent = newStateIndex.addToIndex(parent);
String child = stateIndex.get(rule.child);
rule.child = newStateIndex.addToIndex(child);
}
for (BinaryRule rule : binaryRules) {
String parent = stateIndex.get(rule.parent);
rule.parent = newStateIndex.addToIndex(parent);
String leftChild = stateIndex.get(rule.leftChild);
rule.leftChild = newStateIndex.addToIndex(leftChild);
String rightChild = stateIndex.get(rule.rightChild);
rule.rightChild = newStateIndex.addToIndex(rightChild);
}
// now go through the graphs and add the rules
for (TransducerGraph graph : graphs) {
Object startNode = graph.getStartNode();
for (Arc arc : graph.getArcs()) {
// TODO: make sure these are the strings we're looking for
String source = arc.getSourceNode().toString();
String target = arc.getTargetNode().toString();
Object input = arc.getInput();
String inputString = input.toString();
double output = ((Double) arc.getOutput()).doubleValue();
if (source.equals(startNode)) {
// make a UnaryRule
UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), smartNegate(output));
unaryRules.add(ur);
} else if (inputString.equals(END) || inputString.equals(EPSILON)) {
// make a UnaryRule
UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), smartNegate(output));
unaryRules.add(ur);
} else {
// make a BinaryRule
// figure out whether the input was generated on the left or right
int length = inputString.length();
char leftOrRight = inputString.charAt(length - 1);
inputString = inputString.substring(0, length - 1);
BinaryRule br;
if (leftOrRight == '<' || leftOrRight == '[') {
br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), newStateIndex.addToIndex(source), smartNegate(output));
} else if (leftOrRight == '>' || leftOrRight == ']') {
br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), newStateIndex.addToIndex(inputString), smartNegate(output));
} else {
throw new RuntimeException("Arc input is in unexpected format: " + arc);
}
binaryRules.add(br);
}
}
}
// by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
ClassicCounter<String> symbolCounter = new ClassicCounter<>();
if (outputType == RAW_COUNTS) {
// so we count parent symbol occurrences
for (UnaryRule rule : unaryRules) {
symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
}
for (BinaryRule rule : binaryRules) {
symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
}
}
// now we put the rules in the grammars
// this should be smaller than last one
int numStates = newStateIndex.size();
int numRules = 0;
UnaryGrammar ug = new UnaryGrammar(newStateIndex);
BinaryGrammar bg = new BinaryGrammar(newStateIndex);
for (UnaryRule rule : unaryRules) {
if (outputType == RAW_COUNTS) {
double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
rule.score = (float) Math.log(rule.score / count);
}
ug.addRule(rule);
numRules++;
}
for (BinaryRule rule : binaryRules) {
if (outputType == RAW_COUNTS) {
double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
rule.score = (float) Math.log((rule.score - op.trainOptions.ruleDiscount) / count);
}
bg.addRule(rule);
numRules++;
}
if (verbose) {
System.out.println("Number of minimized rules: " + numRules);
System.out.println("Number of minimized states: " + newStateIndex.size());
}
ug.purgeRules();
bg.splitRules();
return new Pair<>(ug, bg);
}
use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.
the class ExactAutomatonMinimizer method buildMinimizedFA.
protected TransducerGraph buildMinimizedFA() {
TransducerGraph minimizedFA = new TransducerGraph();
TransducerGraph unminimizedFA = getUnminimizedFA();
for (TransducerGraph.Arc arc : unminimizedFA.getArcs()) {
Set<Arc> source = projectNode(arc.getSourceNode());
Set<Arc> target = projectNode(arc.getTargetNode());
try {
if (minimizedFA.canAddArc(source, target, arc.getInput(), arc.getOutput())) {
minimizedFA.addArc(source, target, arc.getInput(), arc.getOutput());
}
} catch (Exception e) {
//throw new IllegalArgumentException();
}
}
minimizedFA.setStartNode(projectNode(unminimizedFA.getStartNode()));
for (Object o : unminimizedFA.getEndNodes()) {
minimizedFA.setEndNode(projectNode(o));
}
return minimizedFA;
}
use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.
the class ExactAutomatonMinimizer method getInverseImages.
protected Collection<Object> getInverseImages(ExactBlock<Arc> block, Object symbol) {
List<Object> inverseImages = new ArrayList<>();
for (Arc member : block.getMembers()) {
Collection<Arc> arcs = null;
if (member != SINK_NODE) {
arcs = getUnminimizedFA().getArcsByTargetAndInput(member, symbol);
} else {
arcs = getUnminimizedFA().getArcsByInput(symbol);
if (!sparseMode) {
arcs = difference(getUnminimizedFA().getArcs(), arcs);
}
}
if (arcs == null) {
continue;
}
for (Arc arc : arcs) {
Object source = arc.getSourceNode();
inverseImages.add(source);
}
}
return inverseImages;
}
use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.
the class ExactAutomatonMinimizer method makeBlock.
protected void makeBlock(Collection<Arc> members) {
ExactBlock<Arc> block = new ExactBlock<>(Generics.newHashSet(members));
for (Arc member : block.getMembers()) {
if (member != SINK_NODE) {
memberToBlock.put(member, block);
}
}
for (Object o : getSymbols()) {
Arc symbol = (Arc) o;
addActivePair(new Pair<>(block, symbol));
}
}
use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.
the class ExactAutomatonMinimizer method minimize.
protected void minimize() {
makeInitialBlocks();
while (hasActivePair()) {
Pair<ExactBlock<Arc>, ?> activePair = getActivePair();
ExactBlock<Arc> activeBlock = activePair.first();
Object symbol = activePair.second();
Collection<Object> inverseImages = getInverseImages(activeBlock, symbol);
Map<ExactBlock<Arc>, Set<Object>> inverseImagesByBlock = sortIntoBlocks(inverseImages);
for (ExactBlock<Arc> block : inverseImagesByBlock.keySet()) {
if (block == null) {
throw new RuntimeException("block was null");
}
Collection members = inverseImagesByBlock.get(block);
if (members.size() == 0 || members.size() == block.getMembers().size()) {
continue;
}
if (members.size() > block.getMembers().size() - members.size()) {
members = difference(block.getMembers(), members);
}
removeAll(block.getMembers(), members);
makeBlock(members);
}
}
}
Aggregations