use of com.joliciel.jochre.stats.FScoreCalculator in project jochre by urieli.
the class SplitEvaluator method evaluate.
public FScoreCalculator<String> evaluate(JochreCorpusShapeReader shapeReader, ShapeSplitter shapeSplitter) {
FScoreCalculator<String> fScoreCalculator = new FScoreCalculator<String>();
while (shapeReader.hasNext()) {
Shape shape = shapeReader.next();
// check if shape is wide enough to bother with
double widthRatio = (double) shape.getWidth() / (double) shape.getXHeight();
double heightRatio = (double) shape.getHeight() / (double) shape.getXHeight();
if (widthRatio >= minWidthRatio || shape.getSplits().size() > 0) {
LOG.debug("Testing " + shape);
List<Split> guessedSplits = new ArrayList<Split>();
if (widthRatio >= minWidthRatio && heightRatio >= minHeightRatio) {
List<ShapeSequence> shapeSequences = shapeSplitter.split(shape);
ShapeSequence splitShapes = shapeSequences.get(0);
if (splitShapes.getScore() > minProbabilityForDecision) {
for (ShapeInSequence splitShapeInSequence : splitShapes) {
Shape splitShape = splitShapeInSequence.getShape();
if (splitShape.getRight() != shape.getRight()) {
Split guessedSplit = new Split(shape, jochreSession);
guessedSplit.setPosition(splitShape.getRight() - shape.getLeft());
guessedSplits.add(guessedSplit);
}
}
}
} else {
LOG.debug("Insufficient width or height");
LOG.debug("widthRatio: " + widthRatio);
LOG.debug("heightRatio: " + heightRatio);
}
Set<Split> splitsNotFound = new HashSet<Split>();
Set<Split> wrongSplitGuesses = new HashSet<Split>(guessedSplits);
Set<Split> remainingSplitGuesses = new HashSet<Split>(guessedSplits);
if (shape.getSplits().size() > 0) {
for (Split split : shape.getSplits()) {
LOG.debug("true split: " + split + ", right=" + (shape.getLeft() + split.getPosition()));
boolean foundSplit = false;
for (Split splitGuess : remainingSplitGuesses) {
int diff = split.getPosition() - splitGuess.getPosition();
if (diff < 0)
diff = 0 - diff;
if (diff <= tolerance) {
LOG.debug("Found split: " + splitGuess);
fScoreCalculator.increment("YES", "YES");
wrongSplitGuesses.remove(splitGuess);
foundSplit = true;
break;
}
}
if (!foundSplit)
splitsNotFound.add(split);
remainingSplitGuesses = wrongSplitGuesses;
}
for (Split split : splitsNotFound) {
LOG.debug("Didn't find split: " + split);
if (widthRatio >= minWidthRatio)
fScoreCalculator.increment("YES", "NO");
else
fScoreCalculator.increment("YES", "NARROW");
}
for (Split guess : wrongSplitGuesses) {
LOG.debug("Bad guess: " + guess);
fScoreCalculator.increment("NO", "YES");
}
} else {
if (wrongSplitGuesses.size() == 0) {
fScoreCalculator.increment("NO", "NO");
} else {
for (Split guess : wrongSplitGuesses) {
LOG.debug("Bad guess: " + guess);
fScoreCalculator.increment("NO", "YES");
}
}
}
}
}
return fScoreCalculator;
}
use of com.joliciel.jochre.stats.FScoreCalculator in project jochre by urieli.
the class MergeEvaluator method evaluate.
public FScoreCalculator<String> evaluate(JochreCorpusGroupReader groupReader, ShapeMerger shapeMerger) {
LOG.debug("evaluate");
FScoreCalculator<String> fScoreCalculator = new FScoreCalculator<String>();
while (groupReader.hasNext()) {
GroupOfShapes group = groupReader.next();
Shape previousShape = null;
for (Shape shape : group.getShapes()) {
if (previousShape != null) {
ShapePair mergeCandidate = new ShapePair(previousShape, shape);
double widthRatio = 0;
double distanceRatio = 0;
if (mergeCandidate.getXHeight() > 0) {
widthRatio = (double) mergeCandidate.getWidth() / (double) mergeCandidate.getXHeight();
distanceRatio = (double) mergeCandidate.getInnerDistance() / (double) mergeCandidate.getXHeight();
}
boolean shouldMerge = false;
if (mergeCandidate.getFirstShape().getLetter().startsWith("|")) {
if (mergeCandidate.getSecondShape().getLetter().length() == 0 || mergeCandidate.getSecondShape().getLetter().endsWith("|"))
shouldMerge = true;
} else if (mergeCandidate.getSecondShape().getLetter().endsWith("|")) {
if (mergeCandidate.getFirstShape().getLetter().length() == 0)
shouldMerge = true;
}
if (LOG.isTraceEnabled()) {
LOG.trace(mergeCandidate.toString());
LOG.trace("widthRatio: " + widthRatio);
LOG.trace("distanceRatio: " + distanceRatio);
LOG.trace("shouldMerge: " + shouldMerge);
}
if (widthRatio <= maxWidthRatio && distanceRatio <= maxDistanceRatio) {
double mergeProb = shapeMerger.checkMerge(previousShape, shape);
boolean wantsToMerge = (mergeProb >= minProbabilityForDecision);
fScoreCalculator.increment(shouldMerge ? "YES" : "NO", wantsToMerge ? "YES" : "NO");
} else {
LOG.trace("too wide");
if (shouldMerge)
fScoreCalculator.increment("YES", "WIDE");
else
fScoreCalculator.increment("NO", "NO");
}
// too wide?
}
// have previous shape?
previousShape = shape;
}
// next shape
}
// next group
return fScoreCalculator;
}
Aggregations