Example 1 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class GrammarCompactor method addOneUnaryRule.

protected boolean addOneUnaryRule(UnaryRule rule, Map<String, TransducerGraph> graphs) {
    String parentString = stateIndex.get(rule.parent);
    String childString = stateIndex.get(rule.child);
    if (isSyntheticState(parentString)) {
        String topcat = getTopCategoryOfSyntheticState(parentString);
        TransducerGraph graph = getGraphFromMap(graphs, topcat);
        Double output = new Double(smartNegate(rule.score()));
        graph.addArc(graph.getStartNode(), parentString, childString, output);
        return true;
    } else if (isSyntheticState(childString)) {
        // need to add Arc from synthetic state to endState
        TransducerGraph graph = getGraphFromMap(graphs, parentString);
        Double output = new Double(smartNegate(rule.score()));
        // parentString should the the same as endState
        graph.addArc(childString, parentString, END, output);
        return true;
    } else {
        return false;
Example 2 with TransducerGraph

the class GrammarCompactor method convertGraphsToGrammar.

   * @param graphs      a Map from String categories to TransducerGraph objects
   * @param unaryRules  is a Set of UnaryRule objects that we need to add
   * @param binaryRules is a Set of BinaryRule objects that we need to add
   * @return a new Pair of UnaryGrammar, BinaryGrammar
protected Pair<UnaryGrammar, BinaryGrammar> convertGraphsToGrammar(Set<TransducerGraph> graphs, Set<UnaryRule> unaryRules, Set<BinaryRule> binaryRules) {
    // first go through all the existing rules and number them with new numberer
    newStateIndex = new HashIndex<>();
    for (UnaryRule rule : unaryRules) {
        String parent = stateIndex.get(rule.parent);
        rule.parent = newStateIndex.addToIndex(parent);
        String child = stateIndex.get(rule.child);
        rule.child = newStateIndex.addToIndex(child);
    for (BinaryRule rule : binaryRules) {
        String parent = stateIndex.get(rule.parent);
        rule.parent = newStateIndex.addToIndex(parent);
        String leftChild = stateIndex.get(rule.leftChild);
        rule.leftChild = newStateIndex.addToIndex(leftChild);
        String rightChild = stateIndex.get(rule.rightChild);
        rule.rightChild = newStateIndex.addToIndex(rightChild);
    // now go through the graphs and add the rules
    for (TransducerGraph graph : graphs) {
        Object startNode = graph.getStartNode();
        for (Arc arc : graph.getArcs()) {
            // TODO: make sure these are the strings we're looking for
            String source = arc.getSourceNode().toString();
            String target = arc.getTargetNode().toString();
            Object input = arc.getInput();
            String inputString = input.toString();
            double output = ((Double) arc.getOutput()).doubleValue();
            if (source.equals(startNode)) {
                // make a UnaryRule
                UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), smartNegate(output));
            } else if (inputString.equals(END) || inputString.equals(EPSILON)) {
                // make a UnaryRule
                UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), smartNegate(output));
            } else {
                // make a BinaryRule
                // figure out whether the input was generated on the left or right
                int length = inputString.length();
                char leftOrRight = inputString.charAt(length - 1);
                inputString = inputString.substring(0, length - 1);
                BinaryRule br;
                if (leftOrRight == '<' || leftOrRight == '[') {
                    br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), newStateIndex.addToIndex(source), smartNegate(output));
                } else if (leftOrRight == '>' || leftOrRight == ']') {
                    br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), newStateIndex.addToIndex(inputString), smartNegate(output));
                } else {
                    throw new RuntimeException("Arc input is in unexpected format: " + arc);
    // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
    ClassicCounter<String> symbolCounter = new ClassicCounter<>();
    if (outputType == RAW_COUNTS) {
        // so we count parent symbol occurrences
        for (UnaryRule rule : unaryRules) {
            symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
        for (BinaryRule rule : binaryRules) {
            symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
    // now we put the rules in the grammars
    // this should be smaller than last one
    int numStates = newStateIndex.size();
    int numRules = 0;
    UnaryGrammar ug = new UnaryGrammar(newStateIndex);
    BinaryGrammar bg = new BinaryGrammar(newStateIndex);
    for (UnaryRule rule : unaryRules) {
        if (outputType == RAW_COUNTS) {
            double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
            rule.score = (float) Math.log(rule.score / count);
    for (BinaryRule rule : binaryRules) {
        if (outputType == RAW_COUNTS) {
            double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
            rule.score = (float) Math.log((rule.score - op.trainOptions.ruleDiscount) / count);
    if (verbose) {
        System.out.println("Number of minimized rules: " + numRules);
        System.out.println("Number of minimized states: " + newStateIndex.size());
    return new Pair<>(ug, bg);
Example 3 with TransducerGraph

the class GrammarCompactor method addOneBinaryRule.

protected boolean addOneBinaryRule(BinaryRule rule, Map<String, TransducerGraph> graphs) {
    // parent has to be synthetic in BinaryRule
    String parentString = stateIndex.get(rule.parent);
    String leftString = stateIndex.get(rule.leftChild);
    String rightString = stateIndex.get(rule.rightChild);
    String source, target, input;
    String bracket = null;
    if (op.trainOptions.markFinalStates) {
        bracket = parentString.substring(parentString.length() - 1, parentString.length());
    // the below test is not necessary with left to right grammars
    if (isSyntheticState(leftString)) {
        source = leftString;
        input = rightString + (bracket == null ? ">" : bracket);
    } else if (isSyntheticState(rightString)) {
        source = rightString;
        input = leftString + (bracket == null ? "<" : bracket);
    } else {
        // we don't know what to do with this rule
        return false;
    target = parentString;
    // makes it a real  0 <= k <= infty
    Double output = new Double(smartNegate(rule.score()));
    String topcat = getTopCategoryOfSyntheticState(source);
    if (topcat == null) {
        throw new RuntimeException("can't have null topcat");
    TransducerGraph graph = getGraphFromMap(graphs, topcat);
    graph.addArc(source, target, input, output);
    return true;
Example 4 with TransducerGraph

the class ExactGrammarCompactor method doCompaction.

protected TransducerGraph doCompaction(TransducerGraph graph, List l1, List l3) {
    TransducerGraph result = graph;
    if (saveGraphs) {
        writeFile(result, "unminimized", (String) result.getEndNodes().iterator().next());
    result = quasiDeterminizer.processGraph(result);
    // combine outputs into inputs
    result = new TransducerGraph(result, ocp);
    // minimize the thing
    result = minimizer.minimizeFA(result);
    //result = new  TransducerGraph(graph, otsp); // for debugging
    // pull out strings from sets returned by minimizer
    result = new TransducerGraph(result, ntsp);
    // split outputs from inputs
    result = new TransducerGraph(result, isp);
    if (saveGraphs) {
        writeFile(result, "exactminimized", (String) result.getEndNodes().iterator().next());
    //, result, 100));
    return result;
Example 5 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class GrammarCompactor method compactGrammar.

   * Compacts the grammar specified by the Pair.
   * @param grammar       a Pair of grammars, ordered UnaryGrammar BinaryGrammar.
   * @param allTrainPaths a Map from String passive constituents to Lists of paths
   * @param allTestPaths  a Map from String passive constituents to Lists of paths
   * @return a Pair of grammars, ordered UnaryGrammar BinaryGrammar.
public Triple<Index<String>, UnaryGrammar, BinaryGrammar> compactGrammar(Pair<UnaryGrammar, BinaryGrammar> grammar, Map<String, List<List<String>>> allTrainPaths, Map<String, List<List<String>>> allTestPaths, Index<String> originalStateIndex) {
    // computed once for the whole grammar
    inputPrior = computeInputPrior(allTrainPaths);
    // BinaryGrammar bg = grammar.second;
    this.stateIndex = originalStateIndex;
    List<List<String>> trainPaths, testPaths;
    Set<UnaryRule> unaryRules = Generics.newHashSet();
    Set<BinaryRule> binaryRules = Generics.newHashSet();
    Map<String, TransducerGraph> graphs = convertGrammarToGraphs(grammar, unaryRules, binaryRules);
    compactedGraphs = Generics.newHashSet();
    if (verbose) {
        System.out.println("There are " + graphs.size() + " categories to compact.");
    int i = 0;
    for (Iterator<Entry<String, TransducerGraph>> graphIter = graphs.entrySet().iterator(); graphIter.hasNext(); ) {
        Map.Entry<String, TransducerGraph> entry =;
        String cat = entry.getKey();
        TransducerGraph graph = entry.getValue();
        if (verbose) {
            System.out.println("About to compact grammar for " + cat + " with numNodes=" + graph.getNodes().size());
        // to save memory
        trainPaths = allTrainPaths.remove(cat);
        if (trainPaths == null) {
            trainPaths = new ArrayList<>();
        // to save memory
        testPaths = allTestPaths.remove(cat);
        if (testPaths == null) {
            testPaths = new ArrayList<>();
        TransducerGraph compactedGraph = doCompaction(graph, trainPaths, testPaths);
        if (verbose) {
            System.out.println(i + ". Compacted grammar for " + cat + " from " + graph.getArcs().size() + " arcs to " + compactedGraph.getArcs().size() + " arcs.");
        // to save memory, remove the last thing
    Pair<UnaryGrammar, BinaryGrammar> ugbg = convertGraphsToGrammar(compactedGraphs, unaryRules, binaryRules);
    return new Triple<>(newStateIndex, ugbg.first(), ugbg.second());
