Search in sources :

Example 1 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class GrammarCompactor method addOneUnaryRule.

protected boolean addOneUnaryRule(UnaryRule rule, Map<String, TransducerGraph> graphs) {
    String parentString = stateIndex.get(rule.parent);
    String childString = stateIndex.get(rule.child);
    if (isSyntheticState(parentString)) {
        String topcat = getTopCategoryOfSyntheticState(parentString);
        TransducerGraph graph = getGraphFromMap(graphs, topcat);
        Double output = new Double(smartNegate(rule.score()));
        graph.addArc(graph.getStartNode(), parentString, childString, output);
        return true;
    } else if (isSyntheticState(childString)) {
        // need to add Arc from synthetic state to endState
        TransducerGraph graph = getGraphFromMap(graphs, parentString);
        Double output = new Double(smartNegate(rule.score()));
        // parentString should the the same as endState
        graph.addArc(childString, parentString, END, output);
        graph.setEndNode(parentString);
        return true;
    } else {
        return false;
    }
}
Also used : TransducerGraph(edu.stanford.nlp.fsm.TransducerGraph)

Example 2 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class GrammarCompactor method convertGraphsToGrammar.

/**
   * @param graphs      a Map from String categories to TransducerGraph objects
   * @param unaryRules  is a Set of UnaryRule objects that we need to add
   * @param binaryRules is a Set of BinaryRule objects that we need to add
   * @return a new Pair of UnaryGrammar, BinaryGrammar
   */
protected Pair<UnaryGrammar, BinaryGrammar> convertGraphsToGrammar(Set<TransducerGraph> graphs, Set<UnaryRule> unaryRules, Set<BinaryRule> binaryRules) {
    // first go through all the existing rules and number them with new numberer
    newStateIndex = new HashIndex<>();
    for (UnaryRule rule : unaryRules) {
        String parent = stateIndex.get(rule.parent);
        rule.parent = newStateIndex.addToIndex(parent);
        String child = stateIndex.get(rule.child);
        rule.child = newStateIndex.addToIndex(child);
    }
    for (BinaryRule rule : binaryRules) {
        String parent = stateIndex.get(rule.parent);
        rule.parent = newStateIndex.addToIndex(parent);
        String leftChild = stateIndex.get(rule.leftChild);
        rule.leftChild = newStateIndex.addToIndex(leftChild);
        String rightChild = stateIndex.get(rule.rightChild);
        rule.rightChild = newStateIndex.addToIndex(rightChild);
    }
    // now go through the graphs and add the rules
    for (TransducerGraph graph : graphs) {
        Object startNode = graph.getStartNode();
        for (Arc arc : graph.getArcs()) {
            // TODO: make sure these are the strings we're looking for
            String source = arc.getSourceNode().toString();
            String target = arc.getTargetNode().toString();
            Object input = arc.getInput();
            String inputString = input.toString();
            double output = ((Double) arc.getOutput()).doubleValue();
            if (source.equals(startNode)) {
                // make a UnaryRule
                UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), smartNegate(output));
                unaryRules.add(ur);
            } else if (inputString.equals(END) || inputString.equals(EPSILON)) {
                // make a UnaryRule
                UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), smartNegate(output));
                unaryRules.add(ur);
            } else {
                // make a BinaryRule
                // figure out whether the input was generated on the left or right
                int length = inputString.length();
                char leftOrRight = inputString.charAt(length - 1);
                inputString = inputString.substring(0, length - 1);
                BinaryRule br;
                if (leftOrRight == '<' || leftOrRight == '[') {
                    br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), newStateIndex.addToIndex(source), smartNegate(output));
                } else if (leftOrRight == '>' || leftOrRight == ']') {
                    br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), newStateIndex.addToIndex(inputString), smartNegate(output));
                } else {
                    throw new RuntimeException("Arc input is in unexpected format: " + arc);
                }
                binaryRules.add(br);
            }
        }
    }
    // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
    ClassicCounter<String> symbolCounter = new ClassicCounter<>();
    if (outputType == RAW_COUNTS) {
        // so we count parent symbol occurrences
        for (UnaryRule rule : unaryRules) {
            symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
        }
        for (BinaryRule rule : binaryRules) {
            symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
        }
    }
    // now we put the rules in the grammars
    // this should be smaller than last one
    int numStates = newStateIndex.size();
    int numRules = 0;
    UnaryGrammar ug = new UnaryGrammar(newStateIndex);
    BinaryGrammar bg = new BinaryGrammar(newStateIndex);
    for (UnaryRule rule : unaryRules) {
        if (outputType == RAW_COUNTS) {
            double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
            rule.score = (float) Math.log(rule.score / count);
        }
        ug.addRule(rule);
        numRules++;
    }
    for (BinaryRule rule : binaryRules) {
        if (outputType == RAW_COUNTS) {
            double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
            rule.score = (float) Math.log((rule.score - op.trainOptions.ruleDiscount) / count);
        }
        bg.addRule(rule);
        numRules++;
    }
    if (verbose) {
        System.out.println("Number of minimized rules: " + numRules);
        System.out.println("Number of minimized states: " + newStateIndex.size());
    }
    ug.purgeRules();
    bg.splitRules();
    return new Pair<>(ug, bg);
}
Also used : Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc) TransducerGraph(edu.stanford.nlp.fsm.TransducerGraph) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Pair(edu.stanford.nlp.util.Pair)

Example 3 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class GrammarCompactor method addOneBinaryRule.

protected boolean addOneBinaryRule(BinaryRule rule, Map<String, TransducerGraph> graphs) {
    // parent has to be synthetic in BinaryRule
    String parentString = stateIndex.get(rule.parent);
    String leftString = stateIndex.get(rule.leftChild);
    String rightString = stateIndex.get(rule.rightChild);
    String source, target, input;
    String bracket = null;
    if (op.trainOptions.markFinalStates) {
        bracket = parentString.substring(parentString.length() - 1, parentString.length());
    }
    // the below test is not necessary with left to right grammars
    if (isSyntheticState(leftString)) {
        source = leftString;
        input = rightString + (bracket == null ? ">" : bracket);
    } else if (isSyntheticState(rightString)) {
        source = rightString;
        input = leftString + (bracket == null ? "<" : bracket);
    } else {
        // we don't know what to do with this rule
        return false;
    }
    target = parentString;
    // makes it a real  0 <= k <= infty
    Double output = new Double(smartNegate(rule.score()));
    String topcat = getTopCategoryOfSyntheticState(source);
    if (topcat == null) {
        throw new RuntimeException("can't have null topcat");
    }
    TransducerGraph graph = getGraphFromMap(graphs, topcat);
    graph.addArc(source, target, input, output);
    return true;
}
Also used : TransducerGraph(edu.stanford.nlp.fsm.TransducerGraph)

Example 4 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class ExactGrammarCompactor method doCompaction.

@Override
protected TransducerGraph doCompaction(TransducerGraph graph, List l1, List l3) {
    TransducerGraph result = graph;
    if (saveGraphs) {
        writeFile(result, "unminimized", (String) result.getEndNodes().iterator().next());
    }
    result = quasiDeterminizer.processGraph(result);
    // combine outputs into inputs
    result = new TransducerGraph(result, ocp);
    // minimize the thing
    result = minimizer.minimizeFA(result);
    //result = new  TransducerGraph(graph, otsp); // for debugging
    // pull out strings from sets returned by minimizer
    result = new TransducerGraph(result, ntsp);
    // split outputs from inputs
    result = new TransducerGraph(result, isp);
    if (saveGraphs) {
        writeFile(result, "exactminimized", (String) result.getEndNodes().iterator().next());
    }
    //log.info(TransducerGraph.testGraphPaths(graph, result, 100));
    return result;
}
Also used : TransducerGraph(edu.stanford.nlp.fsm.TransducerGraph)

Example 5 with TransducerGraph

use of edu.stanford.nlp.fsm.TransducerGraph in project CoreNLP by stanfordnlp.

the class GrammarCompactor method compactGrammar.

/**
   * Compacts the grammar specified by the Pair.
   *
   * @param grammar       a Pair of grammars, ordered UnaryGrammar BinaryGrammar.
   * @param allTrainPaths a Map from String passive constituents to Lists of paths
   * @param allTestPaths  a Map from String passive constituents to Lists of paths
   * @return a Pair of grammars, ordered UnaryGrammar BinaryGrammar.
   */
public Triple<Index<String>, UnaryGrammar, BinaryGrammar> compactGrammar(Pair<UnaryGrammar, BinaryGrammar> grammar, Map<String, List<List<String>>> allTrainPaths, Map<String, List<List<String>>> allTestPaths, Index<String> originalStateIndex) {
    // computed once for the whole grammar
    inputPrior = computeInputPrior(allTrainPaths);
    // BinaryGrammar bg = grammar.second;
    this.stateIndex = originalStateIndex;
    List<List<String>> trainPaths, testPaths;
    Set<UnaryRule> unaryRules = Generics.newHashSet();
    Set<BinaryRule> binaryRules = Generics.newHashSet();
    Map<String, TransducerGraph> graphs = convertGrammarToGraphs(grammar, unaryRules, binaryRules);
    compactedGraphs = Generics.newHashSet();
    if (verbose) {
        System.out.println("There are " + graphs.size() + " categories to compact.");
    }
    int i = 0;
    for (Iterator<Entry<String, TransducerGraph>> graphIter = graphs.entrySet().iterator(); graphIter.hasNext(); ) {
        Map.Entry<String, TransducerGraph> entry = graphIter.next();
        String cat = entry.getKey();
        TransducerGraph graph = entry.getValue();
        if (verbose) {
            System.out.println("About to compact grammar for " + cat + " with numNodes=" + graph.getNodes().size());
        }
        // to save memory
        trainPaths = allTrainPaths.remove(cat);
        if (trainPaths == null) {
            trainPaths = new ArrayList<>();
        }
        // to save memory
        testPaths = allTestPaths.remove(cat);
        if (testPaths == null) {
            testPaths = new ArrayList<>();
        }
        TransducerGraph compactedGraph = doCompaction(graph, trainPaths, testPaths);
        i++;
        if (verbose) {
            System.out.println(i + ". Compacted grammar for " + cat + " from " + graph.getArcs().size() + " arcs to " + compactedGraph.getArcs().size() + " arcs.");
        }
        // to save memory, remove the last thing
        graphIter.remove();
        compactedGraphs.add(compactedGraph);
    }
    Pair<UnaryGrammar, BinaryGrammar> ugbg = convertGraphsToGrammar(compactedGraphs, unaryRules, binaryRules);
    return new Triple<>(newStateIndex, ugbg.first(), ugbg.second());
}
Also used : Triple(edu.stanford.nlp.util.Triple) Entry(java.util.Map.Entry) TransducerGraph(edu.stanford.nlp.fsm.TransducerGraph)

Aggregations

TransducerGraph (edu.stanford.nlp.fsm.TransducerGraph)6 Arc (edu.stanford.nlp.fsm.TransducerGraph.Arc)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 Pair (edu.stanford.nlp.util.Pair)1 Triple (edu.stanford.nlp.util.Triple)1 Entry (java.util.Map.Entry)1