Search in sources :

Example 1 with Arc

use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.

the class GrammarCompactor method convertGraphsToGrammar.

/**
   * @param graphs      a Map from String categories to TransducerGraph objects
   * @param unaryRules  is a Set of UnaryRule objects that we need to add
   * @param binaryRules is a Set of BinaryRule objects that we need to add
   * @return a new Pair of UnaryGrammar, BinaryGrammar
   */
protected Pair<UnaryGrammar, BinaryGrammar> convertGraphsToGrammar(Set<TransducerGraph> graphs, Set<UnaryRule> unaryRules, Set<BinaryRule> binaryRules) {
    // first go through all the existing rules and number them with new numberer
    newStateIndex = new HashIndex<>();
    for (UnaryRule rule : unaryRules) {
        String parent = stateIndex.get(rule.parent);
        rule.parent = newStateIndex.addToIndex(parent);
        String child = stateIndex.get(rule.child);
        rule.child = newStateIndex.addToIndex(child);
    }
    for (BinaryRule rule : binaryRules) {
        String parent = stateIndex.get(rule.parent);
        rule.parent = newStateIndex.addToIndex(parent);
        String leftChild = stateIndex.get(rule.leftChild);
        rule.leftChild = newStateIndex.addToIndex(leftChild);
        String rightChild = stateIndex.get(rule.rightChild);
        rule.rightChild = newStateIndex.addToIndex(rightChild);
    }
    // now go through the graphs and add the rules
    for (TransducerGraph graph : graphs) {
        Object startNode = graph.getStartNode();
        for (Arc arc : graph.getArcs()) {
            // TODO: make sure these are the strings we're looking for
            String source = arc.getSourceNode().toString();
            String target = arc.getTargetNode().toString();
            Object input = arc.getInput();
            String inputString = input.toString();
            double output = ((Double) arc.getOutput()).doubleValue();
            if (source.equals(startNode)) {
                // make a UnaryRule
                UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), smartNegate(output));
                unaryRules.add(ur);
            } else if (inputString.equals(END) || inputString.equals(EPSILON)) {
                // make a UnaryRule
                UnaryRule ur = new UnaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), smartNegate(output));
                unaryRules.add(ur);
            } else {
                // make a BinaryRule
                // figure out whether the input was generated on the left or right
                int length = inputString.length();
                char leftOrRight = inputString.charAt(length - 1);
                inputString = inputString.substring(0, length - 1);
                BinaryRule br;
                if (leftOrRight == '<' || leftOrRight == '[') {
                    br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(inputString), newStateIndex.addToIndex(source), smartNegate(output));
                } else if (leftOrRight == '>' || leftOrRight == ']') {
                    br = new BinaryRule(newStateIndex.addToIndex(target), newStateIndex.addToIndex(source), newStateIndex.addToIndex(inputString), smartNegate(output));
                } else {
                    throw new RuntimeException("Arc input is in unexpected format: " + arc);
                }
                binaryRules.add(br);
            }
        }
    }
    // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
    ClassicCounter<String> symbolCounter = new ClassicCounter<>();
    if (outputType == RAW_COUNTS) {
        // so we count parent symbol occurrences
        for (UnaryRule rule : unaryRules) {
            symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
        }
        for (BinaryRule rule : binaryRules) {
            symbolCounter.incrementCount(newStateIndex.get(rule.parent), rule.score);
        }
    }
    // now we put the rules in the grammars
    // this should be smaller than last one
    int numStates = newStateIndex.size();
    int numRules = 0;
    UnaryGrammar ug = new UnaryGrammar(newStateIndex);
    BinaryGrammar bg = new BinaryGrammar(newStateIndex);
    for (UnaryRule rule : unaryRules) {
        if (outputType == RAW_COUNTS) {
            double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
            rule.score = (float) Math.log(rule.score / count);
        }
        ug.addRule(rule);
        numRules++;
    }
    for (BinaryRule rule : binaryRules) {
        if (outputType == RAW_COUNTS) {
            double count = symbolCounter.getCount(newStateIndex.get(rule.parent));
            rule.score = (float) Math.log((rule.score - op.trainOptions.ruleDiscount) / count);
        }
        bg.addRule(rule);
        numRules++;
    }
    if (verbose) {
        System.out.println("Number of minimized rules: " + numRules);
        System.out.println("Number of minimized states: " + newStateIndex.size());
    }
    ug.purgeRules();
    bg.splitRules();
    return new Pair<>(ug, bg);
}
Also used : Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc) TransducerGraph(edu.stanford.nlp.fsm.TransducerGraph) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Pair(edu.stanford.nlp.util.Pair)

Example 2 with Arc

use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.

the class ExactAutomatonMinimizer method buildMinimizedFA.

protected TransducerGraph buildMinimizedFA() {
    TransducerGraph minimizedFA = new TransducerGraph();
    TransducerGraph unminimizedFA = getUnminimizedFA();
    for (TransducerGraph.Arc arc : unminimizedFA.getArcs()) {
        Set<Arc> source = projectNode(arc.getSourceNode());
        Set<Arc> target = projectNode(arc.getTargetNode());
        try {
            if (minimizedFA.canAddArc(source, target, arc.getInput(), arc.getOutput())) {
                minimizedFA.addArc(source, target, arc.getInput(), arc.getOutput());
            }
        } catch (Exception e) {
        //throw new IllegalArgumentException();
        }
    }
    minimizedFA.setStartNode(projectNode(unminimizedFA.getStartNode()));
    for (Object o : unminimizedFA.getEndNodes()) {
        minimizedFA.setEndNode(projectNode(o));
    }
    return minimizedFA;
}
Also used : Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc) Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc)

Example 3 with Arc

use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.

the class ExactAutomatonMinimizer method getInverseImages.

protected Collection<Object> getInverseImages(ExactBlock<Arc> block, Object symbol) {
    List<Object> inverseImages = new ArrayList<>();
    for (Arc member : block.getMembers()) {
        Collection<Arc> arcs = null;
        if (member != SINK_NODE) {
            arcs = getUnminimizedFA().getArcsByTargetAndInput(member, symbol);
        } else {
            arcs = getUnminimizedFA().getArcsByInput(symbol);
            if (!sparseMode) {
                arcs = difference(getUnminimizedFA().getArcs(), arcs);
            }
        }
        if (arcs == null) {
            continue;
        }
        for (Arc arc : arcs) {
            Object source = arc.getSourceNode();
            inverseImages.add(source);
        }
    }
    return inverseImages;
}
Also used : Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc) ArrayList(java.util.ArrayList)

Example 4 with Arc

use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.

the class ExactAutomatonMinimizer method makeBlock.

protected void makeBlock(Collection<Arc> members) {
    ExactBlock<Arc> block = new ExactBlock<>(Generics.newHashSet(members));
    for (Arc member : block.getMembers()) {
        if (member != SINK_NODE) {
            memberToBlock.put(member, block);
        }
    }
    for (Object o : getSymbols()) {
        Arc symbol = (Arc) o;
        addActivePair(new Pair<>(block, symbol));
    }
}
Also used : Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc)

Example 5 with Arc

use of edu.stanford.nlp.fsm.TransducerGraph.Arc in project CoreNLP by stanfordnlp.

the class ExactAutomatonMinimizer method minimize.

protected void minimize() {
    makeInitialBlocks();
    while (hasActivePair()) {
        Pair<ExactBlock<Arc>, ?> activePair = getActivePair();
        ExactBlock<Arc> activeBlock = activePair.first();
        Object symbol = activePair.second();
        Collection<Object> inverseImages = getInverseImages(activeBlock, symbol);
        Map<ExactBlock<Arc>, Set<Object>> inverseImagesByBlock = sortIntoBlocks(inverseImages);
        for (ExactBlock<Arc> block : inverseImagesByBlock.keySet()) {
            if (block == null) {
                throw new RuntimeException("block was null");
            }
            Collection members = inverseImagesByBlock.get(block);
            if (members.size() == 0 || members.size() == block.getMembers().size()) {
                continue;
            }
            if (members.size() > block.getMembers().size() - members.size()) {
                members = difference(block.getMembers(), members);
            }
            removeAll(block.getMembers(), members);
            makeBlock(members);
        }
    }
}
Also used : Arc(edu.stanford.nlp.fsm.TransducerGraph.Arc) Set(java.util.Set) Collection(java.util.Collection)

Aggregations

Arc (edu.stanford.nlp.fsm.TransducerGraph.Arc)5 TransducerGraph (edu.stanford.nlp.fsm.TransducerGraph)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 Pair (edu.stanford.nlp.util.Pair)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Set (java.util.Set)1