use of edu.stanford.nlp.util.MutableInteger in project CoreNLP by stanfordnlp.
the class Sighan2005DocumentReaderAndWriter method printLattice.
@Override
public void printLattice(DFSA<String, Integer> tagLattice, List<CoreLabel> doc, PrintWriter out) {
CoreLabel[] docArray = doc.toArray(new CoreLabel[doc.size()]);
// Create answer lattice:
MutableInteger nodeId = new MutableInteger(0);
DFSA<String, Integer> answerLattice = new DFSA<>(null);
DFSAState<String, Integer> aInitState = new DFSAState<>(nodeId.intValue(), answerLattice);
answerLattice.setInitialState(aInitState);
Map<DFSAState<String, Integer>, DFSAState<String, Integer>> stateLinks = Generics.newHashMap();
// Convert binary lattice into word lattice:
tagLatticeToAnswerLattice(tagLattice.initialState(), aInitState, new StringBuilder(""), nodeId, 0, 0.0, stateLinks, answerLattice, docArray);
try {
answerLattice.printAttFsmFormat(out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of edu.stanford.nlp.util.MutableInteger in project CoreNLP by stanfordnlp.
the class Sighan2005DocumentReaderAndWriter method tagLatticeToAnswerLattice.
/**
* Recursively builds an answer lattice (Chinese words) from a Viterbi search graph
* of binary predictions. This function does a limited amount of post-processing:
* preserve white spaces of the input, and not segment between two latin characters or
* between two digits. Consequently, the probabilities of all paths in answerLattice
* may not sum to 1 (they do sum to 1 if no post processing applies).
*
* @param tSource Current node in Viterbi search graph.
* @param aSource Current node in answer lattice.
* @param answer Partial word starting at aSource.
* @param nodeId Currently unused node identifier for answer graph.
* @param pos Current position in docArray.
* @param cost Current cost of answer.
* @param stateLinks Maps nodes of the search graph to nodes in answer lattice
* (when paths of the search graph are recombined, paths of the answer lattice should be
* recombined as well, if at word boundary).
*/
private void tagLatticeToAnswerLattice(DFSAState<String, Integer> tSource, DFSAState<String, Integer> aSource, StringBuilder answer, MutableInteger nodeId, int pos, double cost, Map<DFSAState<String, Integer>, DFSAState<String, Integer>> stateLinks, DFSA<String, Integer> answerLattice, CoreLabel[] docArray) {
// Add "1" prediction after the end of the sentence, if applicable:
if (tSource.isAccepting() && tSource.continuingInputs().isEmpty()) {
tSource.addTransition(new DFSATransition<>("", tSource, new DFSAState<>(-1, null), "1", "", 0));
}
// Get current label, character, and prediction:
CoreLabel curLabel = (pos < docArray.length) ? docArray[pos] : null;
String curChr = null, origSpace = null;
if (curLabel != null) {
curChr = curLabel.get(CoreAnnotations.OriginalCharAnnotation.class);
assert (curChr.length() == 1);
origSpace = curLabel.get(CoreAnnotations.SpaceBeforeAnnotation.class);
}
// Get set of successors in search graph:
Set<String> inputs = tSource.continuingInputs();
// Only keep most probable transition out of initial state:
String answerConstraint = null;
if (pos == 0) {
double minCost = Double.POSITIVE_INFINITY;
// DFSATransition<String, Integer> bestTransition = null;
for (String predictSpace : inputs) {
DFSATransition<String, Integer> transition = tSource.transition(predictSpace);
double transitionCost = transition.score();
if (transitionCost < minCost) {
if (predictSpace != null) {
logger.info(String.format("mincost (%s): %e -> %e%n", predictSpace, minCost, transitionCost));
minCost = transitionCost;
answerConstraint = predictSpace;
}
}
}
}
// Follow along each transition:
for (String predictSpace : inputs) {
DFSATransition<String, Integer> transition = tSource.transition(predictSpace);
DFSAState<String, Integer> tDest = transition.target();
DFSAState<String, Integer> newASource = aSource;
//logger.info(String.format("tsource=%s tdest=%s asource=%s pos=%d predictSpace=%s%n", tSource, tDest, newASource, pos, predictSpace));
StringBuilder newAnswer = new StringBuilder(answer.toString());
int answerLen = newAnswer.length();
String prevChr = (answerLen > 0) ? newAnswer.substring(answerLen - 1) : null;
double newCost = cost;
// Ignore paths starting with zero:
if (answerConstraint != null && !answerConstraint.equals(predictSpace)) {
logger.info(String.format("Skipping transition %s at pos 0.%n", predictSpace));
continue;
}
// Ignore paths not consistent with input segmentation:
if (flags.keepAllWhitespaces && "0".equals(predictSpace) && "1".equals(origSpace)) {
logger.info(String.format("Skipping non-boundary at pos %d, since space in the input.%n", pos));
continue;
}
// (unless already present in original input)
if ("1".equals(predictSpace) && "0".equals(origSpace) && prevChr != null && curChr != null) {
char p = prevChr.charAt(0), c = curChr.charAt(0);
if (ChineseStringUtils.isLetterASCII(p) && ChineseStringUtils.isLetterASCII(c)) {
logger.info(String.format("Not hypothesizing a boundary at pos %d, since between two ASCII letters (%s and %s).%n", pos, prevChr, curChr));
continue;
}
if (ChineseUtils.isNumber(p) && ChineseUtils.isNumber(c)) {
logger.info(String.format("Not hypothesizing a boundary at pos %d, since between two numeral characters (%s and %s).%n", pos, prevChr, curChr));
continue;
}
}
// If predictSpace==1, create a new transition in answer search graph:
if ("1".equals(predictSpace)) {
if (newAnswer.toString().length() > 0) {
// If answer destination node visited before, create a new edge and leave:
if (stateLinks.containsKey(tSource)) {
DFSAState<String, Integer> aDest = stateLinks.get(tSource);
newASource.addTransition(new DFSATransition<>("", newASource, aDest, newAnswer.toString(), "", newCost));
//logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n", newASource, aDest, newAnswer));
continue;
}
// If answer destination node not visited before, create it + new edge:
nodeId.incValue(1);
DFSAState<String, Integer> aDest = new DFSAState<>(nodeId.intValue(), answerLattice, 0.0);
stateLinks.put(tSource, aDest);
newASource.addTransition(new DFSATransition<>("", newASource, aDest, newAnswer.toString(), "", newCost));
// Reached an accepting state:
if (tSource.isAccepting()) {
aDest.setAccepting(true);
continue;
}
// Start new answer edge:
newASource = aDest;
newAnswer = new StringBuilder();
newCost = 0.0;
}
}
assert (curChr != null);
newAnswer.append(curChr);
newCost += transition.score();
if (newCost < flags.searchGraphPrune || ChineseStringUtils.isLetterASCII(curChr.charAt(0)))
tagLatticeToAnswerLattice(tDest, newASource, newAnswer, nodeId, pos + 1, newCost, stateLinks, answerLattice, docArray);
}
}
Aggregations