Search in sources :

Example 1 with DFSAState

use of edu.stanford.nlp.fsm.DFSAState in project CoreNLP by stanfordnlp.

the class Sighan2005DocumentReaderAndWriter method printLattice.

@Override
public void printLattice(DFSA<String, Integer> tagLattice, List<CoreLabel> doc, PrintWriter out) {
    CoreLabel[] docArray = doc.toArray(new CoreLabel[doc.size()]);
    // Create answer lattice:
    MutableInteger nodeId = new MutableInteger(0);
    DFSA<String, Integer> answerLattice = new DFSA<>(null);
    DFSAState<String, Integer> aInitState = new DFSAState<>(nodeId.intValue(), answerLattice);
    answerLattice.setInitialState(aInitState);
    Map<DFSAState<String, Integer>, DFSAState<String, Integer>> stateLinks = Generics.newHashMap();
    // Convert binary lattice into word lattice:
    tagLatticeToAnswerLattice(tagLattice.initialState(), aInitState, new StringBuilder(""), nodeId, 0, 0.0, stateLinks, answerLattice, docArray);
    try {
        answerLattice.printAttFsmFormat(out);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : MutableInteger(edu.stanford.nlp.util.MutableInteger) IOException(java.io.IOException) DFSA(edu.stanford.nlp.fsm.DFSA) MutableInteger(edu.stanford.nlp.util.MutableInteger) CoreLabel(edu.stanford.nlp.ling.CoreLabel) DFSAState(edu.stanford.nlp.fsm.DFSAState)

Example 2 with DFSAState

use of edu.stanford.nlp.fsm.DFSAState in project CoreNLP by stanfordnlp.

the class Sighan2005DocumentReaderAndWriter method tagLatticeToAnswerLattice.

/**
   * Recursively builds an answer lattice (Chinese words) from a Viterbi search graph
   * of binary predictions. This function does a limited amount of post-processing:
   * preserve white spaces of the input, and not segment between two latin characters or
   * between two digits. Consequently, the probabilities of all paths in answerLattice
   * may not sum to 1 (they do sum to 1 if no post processing applies).
   *
   * @param tSource Current node in Viterbi search graph.
   * @param aSource Current node in answer lattice.
   * @param answer Partial word starting at aSource.
   * @param nodeId Currently unused node identifier for answer graph.
   * @param pos Current position in docArray.
   * @param cost Current cost of answer.
   * @param stateLinks Maps nodes of the search graph to nodes in answer lattice
   * (when paths of the search graph are recombined, paths of the answer lattice should be
   *  recombined as well, if at word boundary).
   */
private void tagLatticeToAnswerLattice(DFSAState<String, Integer> tSource, DFSAState<String, Integer> aSource, StringBuilder answer, MutableInteger nodeId, int pos, double cost, Map<DFSAState<String, Integer>, DFSAState<String, Integer>> stateLinks, DFSA<String, Integer> answerLattice, CoreLabel[] docArray) {
    // Add "1" prediction after the end of the sentence, if applicable:
    if (tSource.isAccepting() && tSource.continuingInputs().isEmpty()) {
        tSource.addTransition(new DFSATransition<>("", tSource, new DFSAState<>(-1, null), "1", "", 0));
    }
    // Get current label, character, and prediction:
    CoreLabel curLabel = (pos < docArray.length) ? docArray[pos] : null;
    String curChr = null, origSpace = null;
    if (curLabel != null) {
        curChr = curLabel.get(CoreAnnotations.OriginalCharAnnotation.class);
        assert (curChr.length() == 1);
        origSpace = curLabel.get(CoreAnnotations.SpaceBeforeAnnotation.class);
    }
    // Get set of successors in search graph:
    Set<String> inputs = tSource.continuingInputs();
    // Only keep most probable transition out of initial state:
    String answerConstraint = null;
    if (pos == 0) {
        double minCost = Double.POSITIVE_INFINITY;
        // DFSATransition<String, Integer> bestTransition = null;
        for (String predictSpace : inputs) {
            DFSATransition<String, Integer> transition = tSource.transition(predictSpace);
            double transitionCost = transition.score();
            if (transitionCost < minCost) {
                if (predictSpace != null) {
                    logger.info(String.format("mincost (%s): %e -> %e%n", predictSpace, minCost, transitionCost));
                    minCost = transitionCost;
                    answerConstraint = predictSpace;
                }
            }
        }
    }
    // Follow along each transition:
    for (String predictSpace : inputs) {
        DFSATransition<String, Integer> transition = tSource.transition(predictSpace);
        DFSAState<String, Integer> tDest = transition.target();
        DFSAState<String, Integer> newASource = aSource;
        //logger.info(String.format("tsource=%s tdest=%s asource=%s pos=%d predictSpace=%s%n", tSource, tDest, newASource, pos, predictSpace));
        StringBuilder newAnswer = new StringBuilder(answer.toString());
        int answerLen = newAnswer.length();
        String prevChr = (answerLen > 0) ? newAnswer.substring(answerLen - 1) : null;
        double newCost = cost;
        // Ignore paths starting with zero:
        if (answerConstraint != null && !answerConstraint.equals(predictSpace)) {
            logger.info(String.format("Skipping transition %s at pos 0.%n", predictSpace));
            continue;
        }
        // Ignore paths not consistent with input segmentation:
        if (flags.keepAllWhitespaces && "0".equals(predictSpace) && "1".equals(origSpace)) {
            logger.info(String.format("Skipping non-boundary at pos %d, since space in the input.%n", pos));
            continue;
        }
        // (unless already present in original input)
        if ("1".equals(predictSpace) && "0".equals(origSpace) && prevChr != null && curChr != null) {
            char p = prevChr.charAt(0), c = curChr.charAt(0);
            if (ChineseStringUtils.isLetterASCII(p) && ChineseStringUtils.isLetterASCII(c)) {
                logger.info(String.format("Not hypothesizing a boundary at pos %d, since between two ASCII letters (%s and %s).%n", pos, prevChr, curChr));
                continue;
            }
            if (ChineseUtils.isNumber(p) && ChineseUtils.isNumber(c)) {
                logger.info(String.format("Not hypothesizing a boundary at pos %d, since between two numeral characters (%s and %s).%n", pos, prevChr, curChr));
                continue;
            }
        }
        // If predictSpace==1, create a new transition in answer search graph:
        if ("1".equals(predictSpace)) {
            if (newAnswer.toString().length() > 0) {
                // If answer destination node visited before, create a new edge and leave:
                if (stateLinks.containsKey(tSource)) {
                    DFSAState<String, Integer> aDest = stateLinks.get(tSource);
                    newASource.addTransition(new DFSATransition<>("", newASource, aDest, newAnswer.toString(), "", newCost));
                    //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n", newASource, aDest, newAnswer));
                    continue;
                }
                // If answer destination node not visited before, create it + new edge:
                nodeId.incValue(1);
                DFSAState<String, Integer> aDest = new DFSAState<>(nodeId.intValue(), answerLattice, 0.0);
                stateLinks.put(tSource, aDest);
                newASource.addTransition(new DFSATransition<>("", newASource, aDest, newAnswer.toString(), "", newCost));
                // Reached an accepting state:
                if (tSource.isAccepting()) {
                    aDest.setAccepting(true);
                    continue;
                }
                // Start new answer edge:
                newASource = aDest;
                newAnswer = new StringBuilder();
                newCost = 0.0;
            }
        }
        assert (curChr != null);
        newAnswer.append(curChr);
        newCost += transition.score();
        if (newCost < flags.searchGraphPrune || ChineseStringUtils.isLetterASCII(curChr.charAt(0)))
            tagLatticeToAnswerLattice(tDest, newASource, newAnswer, nodeId, pos + 1, newCost, stateLinks, answerLattice, docArray);
    }
}
Also used : MutableInteger(edu.stanford.nlp.util.MutableInteger) CoreLabel(edu.stanford.nlp.ling.CoreLabel) DFSAState(edu.stanford.nlp.fsm.DFSAState)

Aggregations

DFSAState (edu.stanford.nlp.fsm.DFSAState)2 CoreLabel (edu.stanford.nlp.ling.CoreLabel)2 MutableInteger (edu.stanford.nlp.util.MutableInteger)2 DFSA (edu.stanford.nlp.fsm.DFSA)1 IOException (java.io.IOException)1