Search in sources :

Example 1 with ShapeInSequence

use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.

the class LexiconErrorWriter method onGuessSequence.

@Override
public void onGuessSequence(LetterSequence bestSequence) {
    try {
        int realFrequency = 0;
        if (wordChooser != null)
            realFrequency = wordChooser.getFrequency(bestSequence, false);
        boolean error = !bestSequence.getRealWord().equals(bestSequence.getGuessedWord());
        boolean known = realFrequency > 0;
        boolean badSeg = bestSequence.getRealSequence().contains("[") || bestSequence.getRealSequence().contains("|");
        for (int i = 0; i < 3; i++) {
            Writer writer = null;
            if (i == 0) {
                writer = allWordWriter;
            } else if (i == 1) {
                if (error)
                    writer = allErrorWriter;
                else
                    continue;
            } else {
                int j = 0;
                List<ErrorStatistics> statList = new ArrayList<LexiconErrorWriter.ErrorStatistics>();
                statList.add(errorMap.get(ALL_GROUP));
                statList.add(errorMap.get(currentDoc.getName()));
                for (String docGroupName : documentGroups.keySet()) {
                    if (documentGroups.get(docGroupName).contains(currentDoc.getId()))
                        statList.add(errorMap.get(docGroupName));
                }
                if (beamContainsRightWord) {
                    if (error) {
                        for (ErrorStatistics stats : statList) stats.answerInBeamErrorCount++;
                    } else {
                        for (ErrorStatistics stats : statList) stats.answerInBeamCorrectCount++;
                    }
                    beamContainsRightWord = false;
                }
                Linguistics linguistics = jochreSession.getLinguistics();
                for (ShapeInSequence shapeInSequence : bestSequence.getUnderlyingShapeSequence()) {
                    String letterGuess = bestSequence.getLetters().get(j++);
                    String letter = shapeInSequence.getShape().getLetter();
                    boolean badSegLetter = letter.contains("|") || letter.length() == 0 || (letter.length() > 1 && !linguistics.getDualCharacterLetters().contains(letter));
                    if (letter.equals(letterGuess)) {
                        if (known) {
                            for (ErrorStatistics stats : statList) stats.knownWordCorrectLetterCount++;
                        } else {
                            for (ErrorStatistics stats : statList) stats.unknownWordCorrectLetterCount++;
                        }
                        if (badSegLetter) {
                            for (ErrorStatistics stats : statList) stats.badSegCorrectLetterCount++;
                        } else {
                            for (ErrorStatistics stats : statList) stats.goodSegCorrectLetterCount++;
                        }
                    } else {
                        if (known) {
                            for (ErrorStatistics stats : statList) stats.knownWordErrorLetterCount++;
                        } else {
                            for (ErrorStatistics stats : statList) stats.unknownWordErrorLetterCount++;
                        }
                        if (badSegLetter) {
                            for (ErrorStatistics stats : statList) stats.badSegErrorLetterCount++;
                        } else {
                            for (ErrorStatistics stats : statList) stats.goodSegErrorLetterCount++;
                        }
                    }
                }
                if (error && known) {
                    for (ErrorStatistics stats : statList) stats.knownWordErrorCount++;
                    writer = knownWordErrorWriter;
                } else if (error && !known) {
                    for (ErrorStatistics stats : statList) stats.unknownWordErrorCount++;
                    writer = unknownWordErrorWriter;
                } else if (!error && known) {
                    for (ErrorStatistics stats : statList) stats.knownWordCorrectCount++;
                    writer = knownWordCorrectWriter;
                } else if (!error && !known) {
                    for (ErrorStatistics stats : statList) stats.unknownWordCorrectCount++;
                    writer = unknownWordCorrectWriter;
                }
                if (error) {
                    if (badSeg) {
                        for (ErrorStatistics stats : statList) stats.badSegErrorCount++;
                    } else {
                        for (ErrorStatistics stats : statList) stats.goodSegErrorCount++;
                    }
                } else {
                    if (badSeg) {
                        for (ErrorStatistics stats : statList) stats.badSegCorrectCount++;
                    } else {
                        for (ErrorStatistics stats : statList) stats.goodSegCorrectCount++;
                    }
                }
            }
            writer.write(CSV.format(bestSequence.getRealSequence()));
            writer.write(CSV.format(bestSequence.getRealWord()));
            writer.write(CSV.format(bestSequence.getGuessedSequence()));
            writer.write(CSV.format(bestSequence.getGuessedWord()));
            if (i < 2) {
                writer.write(CSV.format(known ? 1 : 0));
                writer.write(CSV.format(error ? 1 : 0));
            }
            writer.write(CSV.format(realFrequency));
            writer.write(CSV.format(bestSequence.getFrequency()));
            GroupOfShapes group = bestSequence.getGroups().get(0);
            writer.write(CSV.format(group.getRow().getParagraph().getImage().getPage().getDocument().getName()));
            writer.write(CSV.format(group.getRow().getParagraph().getImage().getPage().getIndex()));
            writer.write(CSV.format(group.getRow().getParagraph().getIndex()));
            writer.write(CSV.format(group.getRow().getIndex()));
            writer.write(CSV.format(group.getIndex()));
            writer.write(CSV.format(group.getId()));
            if (this.includeBeam) {
                if (finalSequences != null) {
                    for (LetterSequence sequence : finalSequences) {
                        writer.write(CSV.format(sequence.getGuessedSequence()));
                        writer.write(CSV.format(sequence.getScore()));
                        writer.write(CSV.format(sequence.getAdjustedScore()));
                    }
                }
                writer.write(CSV.format(""));
                if (holdoverSequences != null) {
                    for (LetterSequence sequence : holdoverSequences) {
                        writer.write(CSV.format(sequence.getGuessedSequence()));
                        writer.write(CSV.format(sequence.getScore()));
                        writer.write(CSV.format(sequence.getAdjustedScore()));
                    }
                }
            }
            writer.write("\n");
            writer.flush();
        }
    } catch (IOException e) {
        LOG.error("Failed to write to LexiconErrorWriter", e);
        throw new RuntimeException(e);
    }
}
Also used : LetterSequence(com.joliciel.jochre.letterGuesser.LetterSequence) IOException(java.io.IOException) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) Linguistics(com.joliciel.jochre.lang.Linguistics) ArrayList(java.util.ArrayList) List(java.util.List) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) ShapeInSequence(com.joliciel.jochre.boundaries.ShapeInSequence)

Example 2 with ShapeInSequence

use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.

the class LetterSequence method getRealSequence.

/**
 * A string representation of the real sequence behind this letter sequence
 * (including split letters and inkspots).
 */
public String getRealSequence() {
    if (realSequence == null) {
        Linguistics linguistics = jochreSession.getLinguistics();
        StringBuilder realWordBuilder = new StringBuilder();
        Shape lastShape = null;
        for (ShapeInSequence shapeInSequence : this.getUnderlyingShapeSequence()) {
            for (Shape originalShape : shapeInSequence.getOriginalShapes()) {
                if (!originalShape.equals(lastShape)) {
                    String letter = originalShape.getLetter();
                    if (letter.length() == 0)
                        realWordBuilder.append("[]");
                    else if (letter.length() > 1 && !linguistics.getDualCharacterLetters().contains(letter))
                        realWordBuilder.append("[" + letter + "]");
                    else
                        realWordBuilder.append(letter);
                }
                lastShape = originalShape;
            }
        }
        realSequence = realWordBuilder.toString();
    }
    return realSequence;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) Linguistics(com.joliciel.jochre.lang.Linguistics) ShapeInSequence(com.joliciel.jochre.boundaries.ShapeInSequence)

Example 3 with ShapeInSequence

use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.

the class OriginalShapeLetterAssigner method onGuessSequence.

@Override
public void onGuessSequence(LetterSequence letterSequence) {
    stillValid = true;
    ShapeSequence shapeSequence = letterSequence.getUnderlyingShapeSequence();
    Shape previousOriginalShape = null;
    List<ShapeInSequence> subsequenceForPrevOriginalShape = new ArrayList<ShapeInSequence>();
    for (ShapeInSequence shapeInSequence : shapeSequence) {
        // cases that are possible:
        // 1) shapeInSequence is 1-to-1 with an original shape (A from original shape A)
        // 2) shapeInSequence shares an original shape with previous (B from original shape AB)
        // 3) shapeInSequence shares an original shape with next (A from original shape AB)
        // 4) shapeInSequence shares an original shape with previous and next (B from original shape ABC)
        // 5) shapeInSequence has two original shapes (A from original shapes |A A|)
        // 6) shapeInSequence has 3 original shapes (A from original shapes |A * A|)
        // 7) shapeInSequence shares with previous and has 2+ original shapes (A from |A A|B)
        // 8) shapeInSequence shares with next and has 2+ original shapes (B from A|B B|)
        // So, when we reach a new original shape,
        // either it coincides with a previous shape border, or it doesn't
        List<Shape> originalShapes = shapeInSequence.getOriginalShapes();
        for (Shape nextOriginalShape : originalShapes) {
            if (!nextOriginalShape.equals(previousOriginalShape)) {
                // new original shape, we need to populate the letters of the previous one
                if (previousOriginalShape != null)
                    this.assignLetter(previousOriginalShape, subsequenceForPrevOriginalShape);
                previousOriginalShape = nextOriginalShape;
                subsequenceForPrevOriginalShape = new ArrayList<ShapeInSequence>();
            }
            subsequenceForPrevOriginalShape.add(shapeInSequence);
        }
    // next original shape
    }
    // next underlying shape sequence shape
    if (previousOriginalShape != null)
        this.assignLetter(previousOriginalShape, subsequenceForPrevOriginalShape);
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) ShapeSequence(com.joliciel.jochre.boundaries.ShapeSequence) ArrayList(java.util.ArrayList) ShapeInSequence(com.joliciel.jochre.boundaries.ShapeInSequence)

Example 4 with ShapeInSequence

use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.

the class OriginalShapeLetterAssigner method assignLetter.

void assignLetter(Shape originalShape, List<ShapeInSequence> subsequenceForOriginalShape) {
    String guessedLetter = "";
    for (ShapeInSequence shapeInSubSequence : subsequenceForOriginalShape) {
        if (shapeInSubSequence.getOriginalShapes().size() == 1) {
            // if this subsequence shape has only one original shape,
            // we can go ahead and add the subsequence shape's letter to the original shape
            guessedLetter += shapeInSubSequence.getShape().getOriginalGuess();
        } else {
            // the subsequence shape has multiple original shapes, so its letter has to be
            // split among all of them (these original shapes were joined into a single new shape)
            int j = 0;
            int myIndex = -1;
            for (Shape myOriginalShape : shapeInSubSequence.getOriginalShapes()) {
                if (myOriginalShape.equals(originalShape)) {
                    myIndex = j;
                    break;
                }
                j++;
            }
            if (myIndex == 0) {
                // the original shape starts this subsequence shape
                if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
                    guessedLetter += "|" + shapeInSubSequence.getShape().getOriginalGuess();
            } else if (myIndex == shapeInSubSequence.getOriginalShapes().size() - 1) {
                // the original shape ends this subsequence shape
                if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
                    guessedLetter += shapeInSubSequence.getShape().getOriginalGuess() + "|";
            } else {
            // the original shape is in the middle of this subsequence shape
            // nothing to do here, since we leave these blank
            }
        // if more than one, where is the original shape in this subsequence's original shapes
        }
    // only one original shape for this subsequence shape, or more?
    }
    // next shape in subsequence for this original shape
    originalShape.setOriginalGuess(guessedLetter);
    if (currentImage.getImageStatus().equals(ImageStatus.AUTO_NEW))
        originalShape.setLetter(guessedLetter);
    if (save)
        originalShape.save();
    if (evaluate && stillValid) {
        if (letterValidator == null) {
            throw new JochreException("Cannot evaluate without a letter validator.");
        }
        String realLetter = originalShape.getLetter();
        String realLetterForCheck = realLetter.replace("|", "");
        if (letterValidator.validate(realLetterForCheck)) {
            if (guessedLetter.startsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
                // the guessed letter is the first half of a split dual letter, and is the same as a real letter
                this.incrementFScore(realLetter, realLetter);
            } else if (guessedLetter.endsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
                // the guessed letter is the second half of a split dual letter, and is the same as a real letter
                this.incrementFScore(realLetter, realLetter);
            } else if (realLetter.startsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
                // the real letter is the first half of a split dual letter, and we correctly guessed the first letter of the two
                this.incrementFScore(realLetter, realLetter);
            } else if (realLetter.endsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
                // the real letter is the second half of a split dual letter, and we correctly guessed the second letter of the two
                this.incrementFScore(realLetter, realLetter);
            } else {
                this.incrementFScore(realLetter, guessedLetter);
                if (realLetter.equals(guessedLetter))
                    hasError = true;
            }
        } else {
            // check if there are any invalid characters
            String prevChar = "";
            for (int i = 0; i < realLetterForCheck.length(); i++) {
                String nextChar = "" + realLetterForCheck.charAt(i);
                if (letterValidator.validate(nextChar)) {
                // do nothing
                } else if (letterValidator.validate(prevChar + nextChar)) {
                // do nothing
                } else {
                    stillValid = false;
                    break;
                }
                prevChar = nextChar;
            }
            if (stillValid) {
                this.incrementFScore(realLetter, guessedLetter);
            }
        }
    }
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) JochreException(com.joliciel.jochre.utils.JochreException) ShapeInSequence(com.joliciel.jochre.boundaries.ShapeInSequence)

Example 5 with ShapeInSequence

use of com.joliciel.jochre.boundaries.ShapeInSequence in project jochre by urieli.

the class ShapeIndexFeature method checkInternal.

@Override
public FeatureResult<Integer> checkInternal(ShapeInSequenceWrapper wrapper, RuntimeEnvironment env) {
    ShapeInSequence shapeInSequence = wrapper.getShapeInSequence();
    FeatureResult<Integer> outcome = this.generateResult(shapeInSequence.getIndex());
    return outcome;
}
Also used : ShapeInSequence(com.joliciel.jochre.boundaries.ShapeInSequence)

Aggregations

ShapeInSequence (com.joliciel.jochre.boundaries.ShapeInSequence)12 Shape (com.joliciel.jochre.graphics.Shape)5 ArrayList (java.util.ArrayList)5 ShapeSequence (com.joliciel.jochre.boundaries.ShapeSequence)4 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)4 Linguistics (com.joliciel.jochre.lang.Linguistics)2 LetterSequence (com.joliciel.jochre.letterGuesser.LetterSequence)2 Paragraph (com.joliciel.jochre.graphics.Paragraph)1 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)1 JochreException (com.joliciel.jochre.utils.JochreException)1 Decision (com.joliciel.talismane.machineLearning.Decision)1 BufferedWriter (java.io.BufferedWriter)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1 List (java.util.List)1 PriorityQueue (java.util.PriorityQueue)1 TreeMap (java.util.TreeMap)1