use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.
the class SplitCandidateFinderImplTest method testFindSplitCanidates.
@Test
public void testFindSplitCanidates() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
InputStream imageFileStream = getClass().getResourceAsStream("shape_370454.png");
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
final JochrePage page = mock(JochrePage.class);
JochreImage jochreImage = new SourceImage(page, "name", image, jochreSession);
Shape shape = jochreImage.getShape(0, 0, jochreImage.getWidth() - 1, jochreImage.getHeight() - 1);
SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
List<Split> splits = splitCandidateFinder.findSplitCandidates(shape);
int[] trueSplitPositions = new int[] { 38, 59, 82 };
boolean[] foundSplit = new boolean[] { false, false, false };
for (Split splitCandidate : splits) {
LOG.debug("Split candidate at " + splitCandidate.getPosition());
for (int i = 0; i < trueSplitPositions.length; i++) {
int truePos = trueSplitPositions[i];
int distance = splitCandidate.getPosition() - truePos;
if (distance < 0)
distance = 0 - distance;
if (distance < splitCandidateFinder.getMinDistanceBetweenSplits()) {
foundSplit[i] = true;
LOG.debug("Found split: " + truePos + ", distance " + distance);
}
}
}
for (int i = 0; i < trueSplitPositions.length; i++) {
assertTrue("didn't find split " + trueSplitPositions[i], foundSplit[i]);
}
}
use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.
the class ThinRowFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
Shape shape = shapeWrapper.getShape();
double threshold = 0.75;
JochreImage image = shape.getJochreImage();
double averageRowHeight = image.getAverageRowHeight();
double shapeHeight = shape.getGroup().getRow().getXHeight();
double ratio = shapeHeight / averageRowHeight;
LOG.trace("averageRowHeight: " + averageRowHeight);
LOG.trace("shapeHeight: " + shapeHeight);
LOG.trace("ratio: " + ratio);
LOG.trace("threshold: " + threshold);
FeatureResult<Boolean> outcome = this.generateResult(ratio < threshold);
return outcome;
}
use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.
the class Jochre method doCommandEvaluate.
/**
* Evaluate a given letter guessing model.
* @param criteria
* the criteria used to select the evaluation corpus
*/
public void doCommandEvaluate(CorpusSelectionCriteria criteria, File outputDir, MostLikelyWordChooser wordChooser, boolean reconstructLetters, boolean save, String suffix, boolean includeBeam, List<DocumentObserver> observers) throws IOException {
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
if (baseName.lastIndexOf("/") > 0)
baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
baseName += suffix;
BoundaryDetector boundaryDetector = null;
if (reconstructLetters) {
ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
ShapeMerger merger = new TrainingCorpusShapeMerger();
boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
} else {
boundaryDetector = new OriginalBoundaryDetector();
}
ImageAnalyser evaluator = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
FScoreObserver fScoreObserver = null;
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
if (reconstructLetters) {
OriginalShapeLetterAssigner originalShapeLetterAssigner = new OriginalShapeLetterAssigner();
originalShapeLetterAssigner.setEvaluate(true);
originalShapeLetterAssigner.setSave(save);
originalShapeLetterAssigner.setLetterValidator(letterValidator);
fScoreObserver = originalShapeLetterAssigner;
} else {
LetterAssigner letterAssigner = new LetterAssigner();
letterAssigner.setSave(save);
evaluator.addObserver(letterAssigner);
fScoreObserver = new SimpleLetterFScoreObserver(letterValidator, jochreSession);
}
evaluator.addObserver(fScoreObserver);
ErrorLogger errorLogger = new ErrorLogger(jochreSession);
Writer errorWriter = null;
File errorFile = new File(outputDir, baseName + "_errors.txt");
errorFile.delete();
errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
errorLogger.setErrorWriter(errorWriter);
evaluator.addObserver(errorLogger);
LexiconErrorWriter lexiconErrorWriter = new LexiconErrorWriter(outputDir, baseName, wordChooser, jochreSession);
if (documentGroups != null)
lexiconErrorWriter.setDocumentGroups(documentGroups);
lexiconErrorWriter.setIncludeBeam(includeBeam);
// find all document names (alphabetical ordering)
Set<String> documentNameSet = new TreeSet<>();
JochreCorpusImageReader imageReader1 = new JochreCorpusImageReader(jochreSession);
CorpusSelectionCriteria docCriteria = new CorpusSelectionCriteria();
docCriteria.setImageStatusesToInclude(criteria.getImageStatusesToInclude());
docCriteria.setImageId(criteria.getImageId());
docCriteria.setDocumentId(criteria.getDocumentId());
docCriteria.setDocumentIds(criteria.getDocumentIds());
imageReader1.setSelectionCriteria(docCriteria);
JochreDocument currentDoc = null;
while (imageReader1.hasNext()) {
JochreImage image = imageReader1.next();
if (!image.getPage().getDocument().equals(currentDoc)) {
currentDoc = image.getPage().getDocument();
documentNameSet.add(currentDoc.getName());
}
}
List<String> documentNames = new ArrayList<>(documentNameSet);
lexiconErrorWriter.setDocumentNames(documentNames);
evaluator.addObserver(lexiconErrorWriter);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(evaluator);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
try {
imageProcessor.process();
} finally {
if (errorWriter != null)
errorWriter.close();
}
LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + fScoreObserver.getFScoreCalculator().getTotalFScore());
String modelFileName = baseName;
if (reconstructLetters)
modelFileName += "_Reconstruct";
File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
fScoreObserver.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.
the class JochreDocument method getXml.
/**
* Returns an xml representation of this document as it currently stands, to
* be used for correcting the text associated with this document.
*/
public void getXml(OutputStream outputStream) {
try {
XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory();
XMLStreamWriter writer = xmlOutputFactory.createXMLStreamWriter(outputStream);
writer.writeStartDocument("UTF-8", "1.0");
writer.writeStartElement("doc");
writer.writeAttribute("name", this.getName());
writer.writeAttribute("fileName", this.getFileName());
writer.writeAttribute("locale", this.getLocale().getLanguage());
for (JochrePage page : this.getPages()) {
writer.writeStartElement("page");
writer.writeAttribute("index", "" + page.getIndex());
for (JochreImage image : page.getImages()) {
writer.writeStartElement("image");
writer.writeAttribute("name", image.getName());
writer.writeAttribute("index", "" + image.getIndex());
for (Paragraph paragraph : image.getParagraphs()) {
writer.writeStartElement("paragraph");
writer.writeAttribute("index", "" + paragraph.getIndex());
StringBuffer sb = new StringBuffer();
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
if (shape.getLetter() != null)
sb.append(shape.getLetter());
}
sb.append(" ");
}
sb.append("\r\n");
}
writer.writeCData(sb.toString());
// paragraph
writer.writeEndElement();
}
// image
writer.writeEndElement();
}
// page
writer.writeEndElement();
}
// doc
writer.writeEndElement();
writer.writeEndDocument();
writer.flush();
} catch (XMLStreamException e) {
throw new JochreException(e);
}
}
use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.
the class BorderlineNeighboursFeature method checkInternal.
@Override
public FeatureResult<Double> checkInternal(ShapePair pair, RuntimeEnvironment env) {
FeatureResult<Double> result = null;
FeatureResult<Integer> horizontalToleranceResult = horizontalToleranceFeature.check(pair, env);
FeatureResult<Integer> verticalToleranceResult = verticalToleranceFeature.check(pair, env);
if (horizontalToleranceResult != null && verticalToleranceResult != null) {
int horizontalTolerance = horizontalToleranceResult.getOutcome();
int verticalTolerance = verticalToleranceResult.getOutcome();
Shape shape1 = pair.getFirstShape();
Shape shape2 = pair.getSecondShape();
JochreImage sourceImage = shape1.getJochreImage();
// check that the two shapes have dark areas near each other
Set<Integer> shape1BorderPoints = new HashSet<Integer>();
int shape1MinBorder = sourceImage.isLeftToRight() ? (shape1.getWidth() - horizontalTolerance) - 1 : 0;
int shape1MaxBorder = sourceImage.isLeftToRight() ? shape1.getWidth() : horizontalTolerance + 1;
LOG.trace("shape1MinBorder" + shape1MinBorder);
LOG.trace("shape1MaxBorder" + shape1MaxBorder);
StringBuilder sb = new StringBuilder();
for (int x = shape1MinBorder; x < shape1MaxBorder; x++) {
for (int y = 0; y < shape1.getHeight(); y++) {
if (shape1.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
shape1BorderPoints.add(shape1.getTop() + y);
sb.append(shape1.getTop() + y);
sb.append(',');
}
}
}
LOG.trace(sb.toString());
Set<Integer> shape2BorderPoints = new HashSet<Integer>();
sb = new StringBuilder();
int shape2MinBorder = sourceImage.isLeftToRight() ? 0 : (shape2.getWidth() - horizontalTolerance) - 1;
int shape2MaxBorder = sourceImage.isLeftToRight() ? horizontalTolerance + 1 : shape2.getWidth();
LOG.trace("shape2MinBorder" + shape2MinBorder);
LOG.trace("shape2MaxBorder" + shape2MaxBorder);
for (int x = shape2MinBorder; x < shape2MaxBorder; x++) {
for (int y = 0; y < shape2.getHeight(); y++) {
if (shape2.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
shape2BorderPoints.add(shape2.getTop() + y);
sb.append(shape2.getTop() + y);
sb.append(',');
}
}
}
LOG.trace(sb.toString());
int numNeighbours1 = 0;
for (int shape1BorderPoint : shape1BorderPoints) {
for (int shape2BorderPoint : shape2BorderPoints) {
if (Math.abs(shape2BorderPoint - shape1BorderPoint) <= verticalTolerance) {
numNeighbours1++;
break;
}
}
}
LOG.trace("numNeighbours1: " + numNeighbours1);
int numNeighbours2 = 0;
for (int shape2BorderPoint : shape2BorderPoints) {
for (int shape1BorderPoint : shape1BorderPoints) {
if (Math.abs(shape1BorderPoint - shape2BorderPoint) <= verticalTolerance) {
numNeighbours2++;
break;
}
}
}
LOG.trace("numNeighbours2: " + numNeighbours2);
LOG.trace("shape1BorderPoints: " + shape1BorderPoints.size());
LOG.trace("shape2BorderPoints: " + shape2BorderPoints.size());
double ratio = 0;
if (shape1BorderPoints.size() + shape2BorderPoints.size() > 0)
ratio = ((double) numNeighbours1 + numNeighbours2) / (shape1BorderPoints.size() + shape2BorderPoints.size());
result = this.generateResult(ratio);
}
return result;
}
Aggregations