use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.
the class LexiconErrorWriter method onGuessSequence.
@Override
public void onGuessSequence(LetterSequence bestSequence) {
try {
int realFrequency = 0;
if (wordChooser != null)
realFrequency = wordChooser.getFrequency(bestSequence, false);
boolean error = !bestSequence.getRealWord().equals(bestSequence.getGuessedWord());
boolean known = realFrequency > 0;
boolean badSeg = bestSequence.getRealSequence().contains("[") || bestSequence.getRealSequence().contains("|");
for (int i = 0; i < 3; i++) {
Writer writer = null;
if (i == 0) {
writer = allWordWriter;
} else if (i == 1) {
if (error)
writer = allErrorWriter;
else
continue;
} else {
int j = 0;
List<ErrorStatistics> statList = new ArrayList<LexiconErrorWriter.ErrorStatistics>();
statList.add(errorMap.get(ALL_GROUP));
statList.add(errorMap.get(currentDoc.getName()));
for (String docGroupName : documentGroups.keySet()) {
if (documentGroups.get(docGroupName).contains(currentDoc.getId()))
statList.add(errorMap.get(docGroupName));
}
if (beamContainsRightWord) {
if (error) {
for (ErrorStatistics stats : statList) stats.answerInBeamErrorCount++;
} else {
for (ErrorStatistics stats : statList) stats.answerInBeamCorrectCount++;
}
beamContainsRightWord = false;
}
Linguistics linguistics = jochreSession.getLinguistics();
for (ShapeInSequence shapeInSequence : bestSequence.getUnderlyingShapeSequence()) {
String letterGuess = bestSequence.getLetters().get(j++);
String letter = shapeInSequence.getShape().getLetter();
boolean badSegLetter = letter.contains("|") || letter.length() == 0 || (letter.length() > 1 && !linguistics.getDualCharacterLetters().contains(letter));
if (letter.equals(letterGuess)) {
if (known) {
for (ErrorStatistics stats : statList) stats.knownWordCorrectLetterCount++;
} else {
for (ErrorStatistics stats : statList) stats.unknownWordCorrectLetterCount++;
}
if (badSegLetter) {
for (ErrorStatistics stats : statList) stats.badSegCorrectLetterCount++;
} else {
for (ErrorStatistics stats : statList) stats.goodSegCorrectLetterCount++;
}
} else {
if (known) {
for (ErrorStatistics stats : statList) stats.knownWordErrorLetterCount++;
} else {
for (ErrorStatistics stats : statList) stats.unknownWordErrorLetterCount++;
}
if (badSegLetter) {
for (ErrorStatistics stats : statList) stats.badSegErrorLetterCount++;
} else {
for (ErrorStatistics stats : statList) stats.goodSegErrorLetterCount++;
}
}
}
if (error && known) {
for (ErrorStatistics stats : statList) stats.knownWordErrorCount++;
writer = knownWordErrorWriter;
} else if (error && !known) {
for (ErrorStatistics stats : statList) stats.unknownWordErrorCount++;
writer = unknownWordErrorWriter;
} else if (!error && known) {
for (ErrorStatistics stats : statList) stats.knownWordCorrectCount++;
writer = knownWordCorrectWriter;
} else if (!error && !known) {
for (ErrorStatistics stats : statList) stats.unknownWordCorrectCount++;
writer = unknownWordCorrectWriter;
}
if (error) {
if (badSeg) {
for (ErrorStatistics stats : statList) stats.badSegErrorCount++;
} else {
for (ErrorStatistics stats : statList) stats.goodSegErrorCount++;
}
} else {
if (badSeg) {
for (ErrorStatistics stats : statList) stats.badSegCorrectCount++;
} else {
for (ErrorStatistics stats : statList) stats.goodSegCorrectCount++;
}
}
}
writer.write(CSV.format(bestSequence.getRealSequence()));
writer.write(CSV.format(bestSequence.getRealWord()));
writer.write(CSV.format(bestSequence.getGuessedSequence()));
writer.write(CSV.format(bestSequence.getGuessedWord()));
if (i < 2) {
writer.write(CSV.format(known ? 1 : 0));
writer.write(CSV.format(error ? 1 : 0));
}
writer.write(CSV.format(realFrequency));
writer.write(CSV.format(bestSequence.getFrequency()));
GroupOfShapes group = bestSequence.getGroups().get(0);
writer.write(CSV.format(group.getRow().getParagraph().getImage().getPage().getDocument().getName()));
writer.write(CSV.format(group.getRow().getParagraph().getImage().getPage().getIndex()));
writer.write(CSV.format(group.getRow().getParagraph().getIndex()));
writer.write(CSV.format(group.getRow().getIndex()));
writer.write(CSV.format(group.getIndex()));
writer.write(CSV.format(group.getId()));
if (this.includeBeam) {
if (finalSequences != null) {
for (LetterSequence sequence : finalSequences) {
writer.write(CSV.format(sequence.getGuessedSequence()));
writer.write(CSV.format(sequence.getScore()));
writer.write(CSV.format(sequence.getAdjustedScore()));
}
}
writer.write(CSV.format(""));
if (holdoverSequences != null) {
for (LetterSequence sequence : holdoverSequences) {
writer.write(CSV.format(sequence.getGuessedSequence()));
writer.write(CSV.format(sequence.getScore()));
writer.write(CSV.format(sequence.getAdjustedScore()));
}
}
}
writer.write("\n");
writer.flush();
}
} catch (IOException e) {
LOG.error("Failed to write to LexiconErrorWriter", e);
throw new RuntimeException(e);
}
}
use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.
the class LetterSequence method getRealSequence.
/**
* A string representation of the real sequence behind this letter sequence
* (including split letters and inkspots).
*/
public String getRealSequence() {
if (realSequence == null) {
Linguistics linguistics = jochreSession.getLinguistics();
StringBuilder realWordBuilder = new StringBuilder();
Shape lastShape = null;
for (ShapeInSequence shapeInSequence : this.getUnderlyingShapeSequence()) {
for (Shape originalShape : shapeInSequence.getOriginalShapes()) {
if (!originalShape.equals(lastShape)) {
String letter = originalShape.getLetter();
if (letter.length() == 0)
realWordBuilder.append("[]");
else if (letter.length() > 1 && !linguistics.getDualCharacterLetters().contains(letter))
realWordBuilder.append("[" + letter + "]");
else
realWordBuilder.append(letter);
}
lastShape = originalShape;
}
}
realSequence = realWordBuilder.toString();
}
return realSequence;
}
use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.
the class OriginalShapeLetterAssigner method onGuessSequence.
@Override
public void onGuessSequence(LetterSequence letterSequence) {
stillValid = true;
ShapeSequence shapeSequence = letterSequence.getUnderlyingShapeSequence();
Shape previousOriginalShape = null;
List<ShapeInSequence> subsequenceForPrevOriginalShape = new ArrayList<ShapeInSequence>();
for (ShapeInSequence shapeInSequence : shapeSequence) {
// cases that are possible:
// 1) shapeInSequence is 1-to-1 with an original shape (A from original shape A)
// 2) shapeInSequence shares an original shape with previous (B from original shape AB)
// 3) shapeInSequence shares an original shape with next (A from original shape AB)
// 4) shapeInSequence shares an original shape with previous and next (B from original shape ABC)
// 5) shapeInSequence has two original shapes (A from original shapes |A A|)
// 6) shapeInSequence has 3 original shapes (A from original shapes |A * A|)
// 7) shapeInSequence shares with previous and has 2+ original shapes (A from |A A|B)
// 8) shapeInSequence shares with next and has 2+ original shapes (B from A|B B|)
// So, when we reach a new original shape,
// either it coincides with a previous shape border, or it doesn't
List<Shape> originalShapes = shapeInSequence.getOriginalShapes();
for (Shape nextOriginalShape : originalShapes) {
if (!nextOriginalShape.equals(previousOriginalShape)) {
// new original shape, we need to populate the letters of the previous one
if (previousOriginalShape != null)
this.assignLetter(previousOriginalShape, subsequenceForPrevOriginalShape);
previousOriginalShape = nextOriginalShape;
subsequenceForPrevOriginalShape = new ArrayList<ShapeInSequence>();
}
subsequenceForPrevOriginalShape.add(shapeInSequence);
}
// next original shape
}
// next underlying shape sequence shape
if (previousOriginalShape != null)
this.assignLetter(previousOriginalShape, subsequenceForPrevOriginalShape);
}
use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.
the class OriginalShapeLetterAssigner method assignLetter.
void assignLetter(Shape originalShape, List<ShapeInSequence> subsequenceForOriginalShape) {
String guessedLetter = "";
for (ShapeInSequence shapeInSubSequence : subsequenceForOriginalShape) {
if (shapeInSubSequence.getOriginalShapes().size() == 1) {
// if this subsequence shape has only one original shape,
// we can go ahead and add the subsequence shape's letter to the original shape
guessedLetter += shapeInSubSequence.getShape().getOriginalGuess();
} else {
// the subsequence shape has multiple original shapes, so its letter has to be
// split among all of them (these original shapes were joined into a single new shape)
int j = 0;
int myIndex = -1;
for (Shape myOriginalShape : shapeInSubSequence.getOriginalShapes()) {
if (myOriginalShape.equals(originalShape)) {
myIndex = j;
break;
}
j++;
}
if (myIndex == 0) {
// the original shape starts this subsequence shape
if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
guessedLetter += "|" + shapeInSubSequence.getShape().getOriginalGuess();
} else if (myIndex == shapeInSubSequence.getOriginalShapes().size() - 1) {
// the original shape ends this subsequence shape
if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
guessedLetter += shapeInSubSequence.getShape().getOriginalGuess() + "|";
} else {
// the original shape is in the middle of this subsequence shape
// nothing to do here, since we leave these blank
}
// if more than one, where is the original shape in this subsequence's original shapes
}
// only one original shape for this subsequence shape, or more?
}
// next shape in subsequence for this original shape
originalShape.setOriginalGuess(guessedLetter);
if (currentImage.getImageStatus().equals(ImageStatus.AUTO_NEW))
originalShape.setLetter(guessedLetter);
if (save)
originalShape.save();
if (evaluate && stillValid) {
if (letterValidator == null) {
throw new JochreException("Cannot evaluate without a letter validator.");
}
String realLetter = originalShape.getLetter();
String realLetterForCheck = realLetter.replace("|", "");
if (letterValidator.validate(realLetterForCheck)) {
if (guessedLetter.startsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
// the guessed letter is the first half of a split dual letter, and is the same as a real letter
this.incrementFScore(realLetter, realLetter);
} else if (guessedLetter.endsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
// the guessed letter is the second half of a split dual letter, and is the same as a real letter
this.incrementFScore(realLetter, realLetter);
} else if (realLetter.startsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
// the real letter is the first half of a split dual letter, and we correctly guessed the first letter of the two
this.incrementFScore(realLetter, realLetter);
} else if (realLetter.endsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
// the real letter is the second half of a split dual letter, and we correctly guessed the second letter of the two
this.incrementFScore(realLetter, realLetter);
} else {
this.incrementFScore(realLetter, guessedLetter);
if (realLetter.equals(guessedLetter))
hasError = true;
}
} else {
// check if there are any invalid characters
String prevChar = "";
for (int i = 0; i < realLetterForCheck.length(); i++) {
String nextChar = "" + realLetterForCheck.charAt(i);
if (letterValidator.validate(nextChar)) {
// do nothing
} else if (letterValidator.validate(prevChar + nextChar)) {
// do nothing
} else {
stillValid = false;
break;
}
prevChar = nextChar;
}
if (stillValid) {
this.incrementFScore(realLetter, guessedLetter);
}
}
}
}
use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.
the class ShapeIndexFeature method checkInternal.
@Override
public FeatureResult<Integer> checkInternal(ShapeInSequenceWrapper wrapper, RuntimeEnvironment env) {
ShapeInSequence shapeInSequence = wrapper.getShapeInSequence();
FeatureResult<Integer> outcome = this.generateResult(shapeInSequence.getIndex());
return outcome;
}
Aggregations