Search in sources :

Example 16 with JochreImage

use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.

the class SplitCandidateFinderImplTest method testFindSplitCanidates.

@Test
public void testFindSplitCanidates() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    InputStream imageFileStream = getClass().getResourceAsStream("shape_370454.png");
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    final JochrePage page = mock(JochrePage.class);
    JochreImage jochreImage = new SourceImage(page, "name", image, jochreSession);
    Shape shape = jochreImage.getShape(0, 0, jochreImage.getWidth() - 1, jochreImage.getHeight() - 1);
    SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
    List<Split> splits = splitCandidateFinder.findSplitCandidates(shape);
    int[] trueSplitPositions = new int[] { 38, 59, 82 };
    boolean[] foundSplit = new boolean[] { false, false, false };
    for (Split splitCandidate : splits) {
        LOG.debug("Split candidate at " + splitCandidate.getPosition());
        for (int i = 0; i < trueSplitPositions.length; i++) {
            int truePos = trueSplitPositions[i];
            int distance = splitCandidate.getPosition() - truePos;
            if (distance < 0)
                distance = 0 - distance;
            if (distance < splitCandidateFinder.getMinDistanceBetweenSplits()) {
                foundSplit[i] = true;
                LOG.debug("Found split: " + truePos + ", distance " + distance);
            }
        }
    }
    for (int i = 0; i < trueSplitPositions.length; i++) {
        assertTrue("didn't find split " + trueSplitPositions[i], foundSplit[i]);
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) SourceImage(com.joliciel.jochre.graphics.SourceImage) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) JochrePage(com.joliciel.jochre.doc.JochrePage) BufferedImage(java.awt.image.BufferedImage) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 17 with JochreImage

use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.

the class ThinRowFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
    Shape shape = shapeWrapper.getShape();
    double threshold = 0.75;
    JochreImage image = shape.getJochreImage();
    double averageRowHeight = image.getAverageRowHeight();
    double shapeHeight = shape.getGroup().getRow().getXHeight();
    double ratio = shapeHeight / averageRowHeight;
    LOG.trace("averageRowHeight: " + averageRowHeight);
    LOG.trace("shapeHeight: " + shapeHeight);
    LOG.trace("ratio: " + ratio);
    LOG.trace("threshold: " + threshold);
    FeatureResult<Boolean> outcome = this.generateResult(ratio < threshold);
    return outcome;
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape)

Example 18 with JochreImage

use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.

the class Jochre method doCommandEvaluate.

/**
 * Evaluate a given letter guessing model.
 *  @param criteria
 *          the criteria used to select the evaluation corpus
 */
public void doCommandEvaluate(CorpusSelectionCriteria criteria, File outputDir, MostLikelyWordChooser wordChooser, boolean reconstructLetters, boolean save, String suffix, boolean includeBeam, List<DocumentObserver> observers) throws IOException {
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
    if (baseName.lastIndexOf("/") > 0)
        baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
    baseName += suffix;
    BoundaryDetector boundaryDetector = null;
    if (reconstructLetters) {
        ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
        ShapeMerger merger = new TrainingCorpusShapeMerger();
        boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
    } else {
        boundaryDetector = new OriginalBoundaryDetector();
    }
    ImageAnalyser evaluator = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
    FScoreObserver fScoreObserver = null;
    LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
    if (reconstructLetters) {
        OriginalShapeLetterAssigner originalShapeLetterAssigner = new OriginalShapeLetterAssigner();
        originalShapeLetterAssigner.setEvaluate(true);
        originalShapeLetterAssigner.setSave(save);
        originalShapeLetterAssigner.setLetterValidator(letterValidator);
        fScoreObserver = originalShapeLetterAssigner;
    } else {
        LetterAssigner letterAssigner = new LetterAssigner();
        letterAssigner.setSave(save);
        evaluator.addObserver(letterAssigner);
        fScoreObserver = new SimpleLetterFScoreObserver(letterValidator, jochreSession);
    }
    evaluator.addObserver(fScoreObserver);
    ErrorLogger errorLogger = new ErrorLogger(jochreSession);
    Writer errorWriter = null;
    File errorFile = new File(outputDir, baseName + "_errors.txt");
    errorFile.delete();
    errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
    errorLogger.setErrorWriter(errorWriter);
    evaluator.addObserver(errorLogger);
    LexiconErrorWriter lexiconErrorWriter = new LexiconErrorWriter(outputDir, baseName, wordChooser, jochreSession);
    if (documentGroups != null)
        lexiconErrorWriter.setDocumentGroups(documentGroups);
    lexiconErrorWriter.setIncludeBeam(includeBeam);
    // find all document names (alphabetical ordering)
    Set<String> documentNameSet = new TreeSet<>();
    JochreCorpusImageReader imageReader1 = new JochreCorpusImageReader(jochreSession);
    CorpusSelectionCriteria docCriteria = new CorpusSelectionCriteria();
    docCriteria.setImageStatusesToInclude(criteria.getImageStatusesToInclude());
    docCriteria.setImageId(criteria.getImageId());
    docCriteria.setDocumentId(criteria.getDocumentId());
    docCriteria.setDocumentIds(criteria.getDocumentIds());
    imageReader1.setSelectionCriteria(docCriteria);
    JochreDocument currentDoc = null;
    while (imageReader1.hasNext()) {
        JochreImage image = imageReader1.next();
        if (!image.getPage().getDocument().equals(currentDoc)) {
            currentDoc = image.getPage().getDocument();
            documentNameSet.add(currentDoc.getName());
        }
    }
    List<String> documentNames = new ArrayList<>(documentNameSet);
    lexiconErrorWriter.setDocumentNames(documentNames);
    evaluator.addObserver(lexiconErrorWriter);
    JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
    imageProcessor.addObserver(evaluator);
    for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
    try {
        imageProcessor.process();
    } finally {
        if (errorWriter != null)
            errorWriter.close();
    }
    LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + fScoreObserver.getFScoreCalculator().getTotalFScore());
    String modelFileName = baseName;
    if (reconstructLetters)
        modelFileName += "_Reconstruct";
    File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
    Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
    fScoreObserver.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
Also used : LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) LexiconErrorWriter(com.joliciel.jochre.lexicon.LexiconErrorWriter) ArrayList(java.util.ArrayList) JochreDocument(com.joliciel.jochre.doc.JochreDocument) BufferedWriter(java.io.BufferedWriter) JochreCorpusImageReader(com.joliciel.jochre.graphics.JochreCorpusImageReader) LetterValidator(com.joliciel.jochre.letterGuesser.LetterValidator) JochreCorpusImageProcessor(com.joliciel.jochre.graphics.JochreCorpusImageProcessor) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) TreeSet(java.util.TreeSet) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ShapeSplitter(com.joliciel.jochre.boundaries.ShapeSplitter) JochreImage(com.joliciel.jochre.graphics.JochreImage) CorpusSelectionCriteria(com.joliciel.jochre.graphics.CorpusSelectionCriteria) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) LetterAssigner(com.joliciel.jochre.analyser.LetterAssigner) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) SimpleLetterFScoreObserver(com.joliciel.jochre.analyser.SimpleLetterFScoreObserver) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) ErrorLogger(com.joliciel.jochre.analyser.ErrorLogger) SimpleLetterFScoreObserver(com.joliciel.jochre.analyser.SimpleLetterFScoreObserver) FScoreObserver(com.joliciel.jochre.analyser.FScoreObserver) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) ShapeMerger(com.joliciel.jochre.boundaries.ShapeMerger) FileOutputStream(java.io.FileOutputStream) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) OutputStreamWriter(java.io.OutputStreamWriter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ComponentCharacterValidator(com.joliciel.jochre.letterGuesser.ComponentCharacterValidator) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) UnknownWordListWriter(com.joliciel.jochre.lexicon.UnknownWordListWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) LexiconErrorWriter(com.joliciel.jochre.lexicon.LexiconErrorWriter) OutputStreamWriter(java.io.OutputStreamWriter)

Example 19 with JochreImage

use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.

the class JochreDocument method getXml.

/**
 * Returns an xml representation of this document as it currently stands, to
 * be used for correcting the text associated with this document.
 */
public void getXml(OutputStream outputStream) {
    try {
        XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory();
        XMLStreamWriter writer = xmlOutputFactory.createXMLStreamWriter(outputStream);
        writer.writeStartDocument("UTF-8", "1.0");
        writer.writeStartElement("doc");
        writer.writeAttribute("name", this.getName());
        writer.writeAttribute("fileName", this.getFileName());
        writer.writeAttribute("locale", this.getLocale().getLanguage());
        for (JochrePage page : this.getPages()) {
            writer.writeStartElement("page");
            writer.writeAttribute("index", "" + page.getIndex());
            for (JochreImage image : page.getImages()) {
                writer.writeStartElement("image");
                writer.writeAttribute("name", image.getName());
                writer.writeAttribute("index", "" + image.getIndex());
                for (Paragraph paragraph : image.getParagraphs()) {
                    writer.writeStartElement("paragraph");
                    writer.writeAttribute("index", "" + paragraph.getIndex());
                    StringBuffer sb = new StringBuffer();
                    for (RowOfShapes row : paragraph.getRows()) {
                        for (GroupOfShapes group : row.getGroups()) {
                            for (Shape shape : group.getShapes()) {
                                if (shape.getLetter() != null)
                                    sb.append(shape.getLetter());
                            }
                            sb.append(" ");
                        }
                        sb.append("\r\n");
                    }
                    writer.writeCData(sb.toString());
                    // paragraph
                    writer.writeEndElement();
                }
                // image
                writer.writeEndElement();
            }
            // page
            writer.writeEndElement();
        }
        // doc
        writer.writeEndElement();
        writer.writeEndDocument();
        writer.flush();
    } catch (XMLStreamException e) {
        throw new JochreException(e);
    }
}
Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) JochreException(com.joliciel.jochre.utils.JochreException) XMLStreamException(javax.xml.stream.XMLStreamException) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Paragraph(com.joliciel.jochre.graphics.Paragraph)

Example 20 with JochreImage

use of com.joliciel.jochre.graphics.JochreImage in project jochre by urieli.

the class BorderlineNeighboursFeature method checkInternal.

@Override
public FeatureResult<Double> checkInternal(ShapePair pair, RuntimeEnvironment env) {
    FeatureResult<Double> result = null;
    FeatureResult<Integer> horizontalToleranceResult = horizontalToleranceFeature.check(pair, env);
    FeatureResult<Integer> verticalToleranceResult = verticalToleranceFeature.check(pair, env);
    if (horizontalToleranceResult != null && verticalToleranceResult != null) {
        int horizontalTolerance = horizontalToleranceResult.getOutcome();
        int verticalTolerance = verticalToleranceResult.getOutcome();
        Shape shape1 = pair.getFirstShape();
        Shape shape2 = pair.getSecondShape();
        JochreImage sourceImage = shape1.getJochreImage();
        // check that the two shapes have dark areas near each other
        Set<Integer> shape1BorderPoints = new HashSet<Integer>();
        int shape1MinBorder = sourceImage.isLeftToRight() ? (shape1.getWidth() - horizontalTolerance) - 1 : 0;
        int shape1MaxBorder = sourceImage.isLeftToRight() ? shape1.getWidth() : horizontalTolerance + 1;
        LOG.trace("shape1MinBorder" + shape1MinBorder);
        LOG.trace("shape1MaxBorder" + shape1MaxBorder);
        StringBuilder sb = new StringBuilder();
        for (int x = shape1MinBorder; x < shape1MaxBorder; x++) {
            for (int y = 0; y < shape1.getHeight(); y++) {
                if (shape1.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
                    shape1BorderPoints.add(shape1.getTop() + y);
                    sb.append(shape1.getTop() + y);
                    sb.append(',');
                }
            }
        }
        LOG.trace(sb.toString());
        Set<Integer> shape2BorderPoints = new HashSet<Integer>();
        sb = new StringBuilder();
        int shape2MinBorder = sourceImage.isLeftToRight() ? 0 : (shape2.getWidth() - horizontalTolerance) - 1;
        int shape2MaxBorder = sourceImage.isLeftToRight() ? horizontalTolerance + 1 : shape2.getWidth();
        LOG.trace("shape2MinBorder" + shape2MinBorder);
        LOG.trace("shape2MaxBorder" + shape2MaxBorder);
        for (int x = shape2MinBorder; x < shape2MaxBorder; x++) {
            for (int y = 0; y < shape2.getHeight(); y++) {
                if (shape2.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
                    shape2BorderPoints.add(shape2.getTop() + y);
                    sb.append(shape2.getTop() + y);
                    sb.append(',');
                }
            }
        }
        LOG.trace(sb.toString());
        int numNeighbours1 = 0;
        for (int shape1BorderPoint : shape1BorderPoints) {
            for (int shape2BorderPoint : shape2BorderPoints) {
                if (Math.abs(shape2BorderPoint - shape1BorderPoint) <= verticalTolerance) {
                    numNeighbours1++;
                    break;
                }
            }
        }
        LOG.trace("numNeighbours1: " + numNeighbours1);
        int numNeighbours2 = 0;
        for (int shape2BorderPoint : shape2BorderPoints) {
            for (int shape1BorderPoint : shape1BorderPoints) {
                if (Math.abs(shape1BorderPoint - shape2BorderPoint) <= verticalTolerance) {
                    numNeighbours2++;
                    break;
                }
            }
        }
        LOG.trace("numNeighbours2: " + numNeighbours2);
        LOG.trace("shape1BorderPoints: " + shape1BorderPoints.size());
        LOG.trace("shape2BorderPoints: " + shape2BorderPoints.size());
        double ratio = 0;
        if (shape1BorderPoints.size() + shape2BorderPoints.size() > 0)
            ratio = ((double) numNeighbours1 + numNeighbours2) / (shape1BorderPoints.size() + shape2BorderPoints.size());
        result = this.generateResult(ratio);
    }
    return result;
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) HashSet(java.util.HashSet)

Aggregations

JochreImage (com.joliciel.jochre.graphics.JochreImage)20 Shape (com.joliciel.jochre.graphics.Shape)12 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 JochrePage (com.joliciel.jochre.doc.JochrePage)7 JochreSession (com.joliciel.jochre.JochreSession)6 JochreDocument (com.joliciel.jochre.doc.JochreDocument)6 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)6 Paragraph (com.joliciel.jochre.graphics.Paragraph)6 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)6 Config (com.typesafe.config.Config)6 BufferedImage (java.awt.image.BufferedImage)6 SourceImage (com.joliciel.jochre.graphics.SourceImage)4 TreeSet (java.util.TreeSet)4 SplitFeature (com.joliciel.jochre.boundaries.features.SplitFeature)3 Segmenter (com.joliciel.jochre.graphics.Segmenter)3 Decision (com.joliciel.talismane.machineLearning.Decision)3 DecisionMaker (com.joliciel.talismane.machineLearning.DecisionMaker)3 StringWriter (java.io.StringWriter)3 DocumentObserver (com.joliciel.jochre.doc.DocumentObserver)2