Search in sources :

Example 1 with FScoreCalculator

use of com.joliciel.jochre.stats.FScoreCalculator in project jochre by urieli.

the class SplitEvaluator method evaluate.

public FScoreCalculator<String> evaluate(JochreCorpusShapeReader shapeReader, ShapeSplitter shapeSplitter) {
    FScoreCalculator<String> fScoreCalculator = new FScoreCalculator<String>();
    while (shapeReader.hasNext()) {
        Shape shape = shapeReader.next();
        // check if shape is wide enough to bother with
        double widthRatio = (double) shape.getWidth() / (double) shape.getXHeight();
        double heightRatio = (double) shape.getHeight() / (double) shape.getXHeight();
        if (widthRatio >= minWidthRatio || shape.getSplits().size() > 0) {
            LOG.debug("Testing " + shape);
            List<Split> guessedSplits = new ArrayList<Split>();
            if (widthRatio >= minWidthRatio && heightRatio >= minHeightRatio) {
                List<ShapeSequence> shapeSequences = shapeSplitter.split(shape);
                ShapeSequence splitShapes = shapeSequences.get(0);
                if (splitShapes.getScore() > minProbabilityForDecision) {
                    for (ShapeInSequence splitShapeInSequence : splitShapes) {
                        Shape splitShape = splitShapeInSequence.getShape();
                        if (splitShape.getRight() != shape.getRight()) {
                            Split guessedSplit = new Split(shape, jochreSession);
                            guessedSplit.setPosition(splitShape.getRight() - shape.getLeft());
                            guessedSplits.add(guessedSplit);
                        }
                    }
                }
            } else {
                LOG.debug("Insufficient width or height");
                LOG.debug("widthRatio: " + widthRatio);
                LOG.debug("heightRatio: " + heightRatio);
            }
            Set<Split> splitsNotFound = new HashSet<Split>();
            Set<Split> wrongSplitGuesses = new HashSet<Split>(guessedSplits);
            Set<Split> remainingSplitGuesses = new HashSet<Split>(guessedSplits);
            if (shape.getSplits().size() > 0) {
                for (Split split : shape.getSplits()) {
                    LOG.debug("true split: " + split + ", right=" + (shape.getLeft() + split.getPosition()));
                    boolean foundSplit = false;
                    for (Split splitGuess : remainingSplitGuesses) {
                        int diff = split.getPosition() - splitGuess.getPosition();
                        if (diff < 0)
                            diff = 0 - diff;
                        if (diff <= tolerance) {
                            LOG.debug("Found split: " + splitGuess);
                            fScoreCalculator.increment("YES", "YES");
                            wrongSplitGuesses.remove(splitGuess);
                            foundSplit = true;
                            break;
                        }
                    }
                    if (!foundSplit)
                        splitsNotFound.add(split);
                    remainingSplitGuesses = wrongSplitGuesses;
                }
                for (Split split : splitsNotFound) {
                    LOG.debug("Didn't find split: " + split);
                    if (widthRatio >= minWidthRatio)
                        fScoreCalculator.increment("YES", "NO");
                    else
                        fScoreCalculator.increment("YES", "NARROW");
                }
                for (Split guess : wrongSplitGuesses) {
                    LOG.debug("Bad guess: " + guess);
                    fScoreCalculator.increment("NO", "YES");
                }
            } else {
                if (wrongSplitGuesses.size() == 0) {
                    fScoreCalculator.increment("NO", "NO");
                } else {
                    for (Split guess : wrongSplitGuesses) {
                        LOG.debug("Bad guess: " + guess);
                        fScoreCalculator.increment("NO", "YES");
                    }
                }
            }
        }
    }
    return fScoreCalculator;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) FScoreCalculator(com.joliciel.jochre.stats.FScoreCalculator) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet)

Example 2 with FScoreCalculator

use of com.joliciel.jochre.stats.FScoreCalculator in project jochre by urieli.

the class MergeEvaluator method evaluate.

public FScoreCalculator<String> evaluate(JochreCorpusGroupReader groupReader, ShapeMerger shapeMerger) {
    LOG.debug("evaluate");
    FScoreCalculator<String> fScoreCalculator = new FScoreCalculator<String>();
    while (groupReader.hasNext()) {
        GroupOfShapes group = groupReader.next();
        Shape previousShape = null;
        for (Shape shape : group.getShapes()) {
            if (previousShape != null) {
                ShapePair mergeCandidate = new ShapePair(previousShape, shape);
                double widthRatio = 0;
                double distanceRatio = 0;
                if (mergeCandidate.getXHeight() > 0) {
                    widthRatio = (double) mergeCandidate.getWidth() / (double) mergeCandidate.getXHeight();
                    distanceRatio = (double) mergeCandidate.getInnerDistance() / (double) mergeCandidate.getXHeight();
                }
                boolean shouldMerge = false;
                if (mergeCandidate.getFirstShape().getLetter().startsWith("|")) {
                    if (mergeCandidate.getSecondShape().getLetter().length() == 0 || mergeCandidate.getSecondShape().getLetter().endsWith("|"))
                        shouldMerge = true;
                } else if (mergeCandidate.getSecondShape().getLetter().endsWith("|")) {
                    if (mergeCandidate.getFirstShape().getLetter().length() == 0)
                        shouldMerge = true;
                }
                if (LOG.isTraceEnabled()) {
                    LOG.trace(mergeCandidate.toString());
                    LOG.trace("widthRatio: " + widthRatio);
                    LOG.trace("distanceRatio: " + distanceRatio);
                    LOG.trace("shouldMerge: " + shouldMerge);
                }
                if (widthRatio <= maxWidthRatio && distanceRatio <= maxDistanceRatio) {
                    double mergeProb = shapeMerger.checkMerge(previousShape, shape);
                    boolean wantsToMerge = (mergeProb >= minProbabilityForDecision);
                    fScoreCalculator.increment(shouldMerge ? "YES" : "NO", wantsToMerge ? "YES" : "NO");
                } else {
                    LOG.trace("too wide");
                    if (shouldMerge)
                        fScoreCalculator.increment("YES", "WIDE");
                    else
                        fScoreCalculator.increment("NO", "NO");
                }
            // too wide?
            }
            // have previous shape?
            previousShape = shape;
        }
    // next shape
    }
    // next group
    return fScoreCalculator;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) FScoreCalculator(com.joliciel.jochre.stats.FScoreCalculator)

Aggregations

Shape (com.joliciel.jochre.graphics.Shape)2 FScoreCalculator (com.joliciel.jochre.stats.FScoreCalculator)2 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1