Search in sources :

Example 1 with ImageAnalyser

use of com.joliciel.jochre.analyser.ImageAnalyser in project jochre by urieli.

the class Jochre method doCommandEvaluateFull.

/**
 * Evaluate a suite of split/merge models and letter guessing model.
 *  @param criteria
 *          for selecting the evaluation corpus
 * @param save
 *          whether or not the letter guesses should be saved
 */
public void doCommandEvaluateFull(CorpusSelectionCriteria criteria, boolean save, File outputDir, MostLikelyWordChooser wordChooser, String suffix, List<DocumentObserver> observers) throws IOException {
    String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
    if (baseName.lastIndexOf("/") > 0)
        baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    ClassificationModel splitModel = jochreSession.getSplitModel();
    if (splitModel == null)
        throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
    List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
    SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
    Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
    SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
    splitCandidateFinder.setMinDistanceBetweenSplits(5);
    ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
    ClassificationModel mergeModel = jochreSession.getMergeModel();
    if (mergeModel == null)
        throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
    List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
    MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
    Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
    ShapeMerger shapeMerger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
    BoundaryDetector boundaryDetector = null;
    String boundaryDetectorTypeName = jochreSession.getConfig().getConfig("jochre.boundaries").getString("boundary-detector-type");
    BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.valueOf(boundaryDetectorTypeName);
    switch(boundaryDetectorType) {
        case LetterByLetter:
            boundaryDetector = new LetterByLetterBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
            break;
        case Deterministic:
            boundaryDetector = new DeterministicBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
            break;
    }
    ImageAnalyser imageAnalyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
    LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
    OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
    shapeLetterAssigner.setEvaluate(true);
    shapeLetterAssigner.setSave(save);
    shapeLetterAssigner.setLetterValidator(letterValidator);
    shapeLetterAssigner.setSingleLetterMethod(false);
    imageAnalyser.addObserver(shapeLetterAssigner);
    ErrorLogger errorLogger = new ErrorLogger(jochreSession);
    Writer errorWriter = null;
    File errorFile = new File(outputDir, baseName + suffix + "errors.txt");
    errorFile.delete();
    errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
    errorLogger.setErrorWriter(errorWriter);
    imageAnalyser.addObserver(errorLogger);
    JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
    imageProcessor.addObserver(imageAnalyser);
    for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
    imageProcessor.process();
    LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + shapeLetterAssigner.getFScoreCalculator().getTotalFScore());
    String modelFileName = baseName + suffix + "_full";
    File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
    Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
    shapeLetterAssigner.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
Also used : LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) BufferedWriter(java.io.BufferedWriter) MergeFeatureParser(com.joliciel.jochre.boundaries.features.MergeFeatureParser) LetterValidator(com.joliciel.jochre.letterGuesser.LetterValidator) JochreCorpusImageProcessor(com.joliciel.jochre.graphics.JochreCorpusImageProcessor) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ShapeSplitter(com.joliciel.jochre.boundaries.ShapeSplitter) SplitCandidateFinder(com.joliciel.jochre.boundaries.SplitCandidateFinder) MergeFeature(com.joliciel.jochre.boundaries.features.MergeFeature) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) SplitFeatureParser(com.joliciel.jochre.boundaries.features.SplitFeatureParser) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) ErrorLogger(com.joliciel.jochre.analyser.ErrorLogger) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) ShapeMerger(com.joliciel.jochre.boundaries.ShapeMerger) FileOutputStream(java.io.FileOutputStream) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) OutputStreamWriter(java.io.OutputStreamWriter) ComponentCharacterValidator(com.joliciel.jochre.letterGuesser.ComponentCharacterValidator) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) UnknownWordListWriter(com.joliciel.jochre.lexicon.UnknownWordListWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) LexiconErrorWriter(com.joliciel.jochre.lexicon.LexiconErrorWriter) OutputStreamWriter(java.io.OutputStreamWriter)

Example 2 with ImageAnalyser

use of com.joliciel.jochre.analyser.ImageAnalyser in project jochre by urieli.

the class Jochre method doCommandAnalyse.

/**
 * Analyse a set of images based on a given letter-guessing model.
 *
 * @param criteria
 *          the criteria used to select the documents to be analysed
 * @param wordChooser
 *          the word chooser to use
 * @param observers
 *          the observers, used to create analysis output
 */
public void doCommandAnalyse(CorpusSelectionCriteria criteria, MostLikelyWordChooser wordChooser, List<DocumentObserver> observers) throws IOException {
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    ImageAnalyser analyser = new BeamSearchImageAnalyser(null, letterGuesser, wordChooser, jochreSession);
    LetterAssigner letterAssigner = new LetterAssigner();
    analyser.addObserver(letterAssigner);
    JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
    imageProcessor.addObserver(analyser);
    for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
    imageProcessor.process();
}
Also used : JochreCorpusImageProcessor(com.joliciel.jochre.graphics.JochreCorpusImageProcessor) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) LetterAssigner(com.joliciel.jochre.analyser.LetterAssigner) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser)

Example 3 with ImageAnalyser

use of com.joliciel.jochre.analyser.ImageAnalyser in project jochre by urieli.

the class Jochre method doCommandAnalyse.

/**
 * Full analysis, including merge, split and letter guessing.
 *
 * @param pages
 *          the pages to process, empty means all
 */
public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, Set<Integer> pages, List<DocumentObserver> observers, List<PdfImageObserver> imageObservers) throws IOException {
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    BoundaryDetector boundaryDetector = null;
    LetterGuessObserver letterGuessObserver = null;
    if (jochreSession.getSplitModel() != null && jochreSession.getMergeModel() != null) {
        boundaryDetector = new DeterministicBoundaryDetector(jochreSession.getSplitModel(), jochreSession.getMergeModel(), jochreSession);
        OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
        shapeLetterAssigner.setEvaluate(false);
        shapeLetterAssigner.setSingleLetterMethod(false);
        letterGuessObserver = shapeLetterAssigner;
    } else {
        boundaryDetector = new OriginalBoundaryDetector();
        LetterAssigner letterAssigner = new LetterAssigner();
        letterGuessObserver = letterAssigner;
    }
    ImageAnalyser analyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
    analyser.addObserver(letterGuessObserver);
    JochreDocumentGenerator documentGenerator = new JochreDocumentGenerator(sourceFile.getName(), "", jochreSession);
    documentGenerator.addDocumentObserver(analyser);
    for (DocumentObserver observer : observers) documentGenerator.addDocumentObserver(observer);
    if (!sourceFile.exists())
        throw new JochreException("The file " + sourceFile.getPath() + " does not exist");
    if (sourceFile.getName().toLowerCase().endsWith(".pdf")) {
        PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(sourceFile, pages, documentGenerator);
        for (PdfImageObserver imageObserver : imageObservers) {
            pdfDocumentProcessor.addImageObserver(imageObserver);
        }
        pdfDocumentProcessor.process();
    } else if (sourceFile.getName().toLowerCase().endsWith(".png") || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif")) {
        ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
        extractor.extractDocument();
    } else if (sourceFile.isDirectory()) {
        ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
        extractor.extractDocument();
    } else {
        throw new RuntimeException("Unrecognised file extension");
    }
}
Also used : PdfImageObserver(com.joliciel.jochre.utils.pdf.PdfImageObserver) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) LetterAssigner(com.joliciel.jochre.analyser.LetterAssigner) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) LetterGuessObserver(com.joliciel.jochre.analyser.LetterGuessObserver) JochreDocumentGenerator(com.joliciel.jochre.doc.JochreDocumentGenerator) PdfDocumentProcessor(com.joliciel.jochre.pdf.PdfDocumentProcessor) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) JochreException(com.joliciel.jochre.utils.JochreException) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) ImageDocumentExtractor(com.joliciel.jochre.doc.ImageDocumentExtractor)

Example 4 with ImageAnalyser

use of com.joliciel.jochre.analyser.ImageAnalyser in project jochre by urieli.

the class Jochre method doCommandEvaluate.

/**
 * Evaluate a given letter guessing model.
 *  @param criteria
 *          the criteria used to select the evaluation corpus
 */
public void doCommandEvaluate(CorpusSelectionCriteria criteria, File outputDir, MostLikelyWordChooser wordChooser, boolean reconstructLetters, boolean save, String suffix, boolean includeBeam, List<DocumentObserver> observers) throws IOException {
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
    if (baseName.lastIndexOf("/") > 0)
        baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
    baseName += suffix;
    BoundaryDetector boundaryDetector = null;
    if (reconstructLetters) {
        ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
        ShapeMerger merger = new TrainingCorpusShapeMerger();
        boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
    } else {
        boundaryDetector = new OriginalBoundaryDetector();
    }
    ImageAnalyser evaluator = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
    FScoreObserver fScoreObserver = null;
    LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
    if (reconstructLetters) {
        OriginalShapeLetterAssigner originalShapeLetterAssigner = new OriginalShapeLetterAssigner();
        originalShapeLetterAssigner.setEvaluate(true);
        originalShapeLetterAssigner.setSave(save);
        originalShapeLetterAssigner.setLetterValidator(letterValidator);
        fScoreObserver = originalShapeLetterAssigner;
    } else {
        LetterAssigner letterAssigner = new LetterAssigner();
        letterAssigner.setSave(save);
        evaluator.addObserver(letterAssigner);
        fScoreObserver = new SimpleLetterFScoreObserver(letterValidator, jochreSession);
    }
    evaluator.addObserver(fScoreObserver);
    ErrorLogger errorLogger = new ErrorLogger(jochreSession);
    Writer errorWriter = null;
    File errorFile = new File(outputDir, baseName + "_errors.txt");
    errorFile.delete();
    errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
    errorLogger.setErrorWriter(errorWriter);
    evaluator.addObserver(errorLogger);
    LexiconErrorWriter lexiconErrorWriter = new LexiconErrorWriter(outputDir, baseName, wordChooser, jochreSession);
    if (documentGroups != null)
        lexiconErrorWriter.setDocumentGroups(documentGroups);
    lexiconErrorWriter.setIncludeBeam(includeBeam);
    // find all document names (alphabetical ordering)
    Set<String> documentNameSet = new TreeSet<>();
    JochreCorpusImageReader imageReader1 = new JochreCorpusImageReader(jochreSession);
    CorpusSelectionCriteria docCriteria = new CorpusSelectionCriteria();
    docCriteria.setImageStatusesToInclude(criteria.getImageStatusesToInclude());
    docCriteria.setImageId(criteria.getImageId());
    docCriteria.setDocumentId(criteria.getDocumentId());
    docCriteria.setDocumentIds(criteria.getDocumentIds());
    imageReader1.setSelectionCriteria(docCriteria);
    JochreDocument currentDoc = null;
    while (imageReader1.hasNext()) {
        JochreImage image = imageReader1.next();
        if (!image.getPage().getDocument().equals(currentDoc)) {
            currentDoc = image.getPage().getDocument();
            documentNameSet.add(currentDoc.getName());
        }
    }
    List<String> documentNames = new ArrayList<>(documentNameSet);
    lexiconErrorWriter.setDocumentNames(documentNames);
    evaluator.addObserver(lexiconErrorWriter);
    JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
    imageProcessor.addObserver(evaluator);
    for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
    try {
        imageProcessor.process();
    } finally {
        if (errorWriter != null)
            errorWriter.close();
    }
    LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + fScoreObserver.getFScoreCalculator().getTotalFScore());
    String modelFileName = baseName;
    if (reconstructLetters)
        modelFileName += "_Reconstruct";
    File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
    Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
    fScoreObserver.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
Also used : LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) LexiconErrorWriter(com.joliciel.jochre.lexicon.LexiconErrorWriter) ArrayList(java.util.ArrayList) JochreDocument(com.joliciel.jochre.doc.JochreDocument) BufferedWriter(java.io.BufferedWriter) JochreCorpusImageReader(com.joliciel.jochre.graphics.JochreCorpusImageReader) LetterValidator(com.joliciel.jochre.letterGuesser.LetterValidator) JochreCorpusImageProcessor(com.joliciel.jochre.graphics.JochreCorpusImageProcessor) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) TreeSet(java.util.TreeSet) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ShapeSplitter(com.joliciel.jochre.boundaries.ShapeSplitter) JochreImage(com.joliciel.jochre.graphics.JochreImage) CorpusSelectionCriteria(com.joliciel.jochre.graphics.CorpusSelectionCriteria) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) LetterAssigner(com.joliciel.jochre.analyser.LetterAssigner) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) SimpleLetterFScoreObserver(com.joliciel.jochre.analyser.SimpleLetterFScoreObserver) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) ErrorLogger(com.joliciel.jochre.analyser.ErrorLogger) SimpleLetterFScoreObserver(com.joliciel.jochre.analyser.SimpleLetterFScoreObserver) FScoreObserver(com.joliciel.jochre.analyser.FScoreObserver) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) ShapeMerger(com.joliciel.jochre.boundaries.ShapeMerger) FileOutputStream(java.io.FileOutputStream) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) OutputStreamWriter(java.io.OutputStreamWriter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ComponentCharacterValidator(com.joliciel.jochre.letterGuesser.ComponentCharacterValidator) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) UnknownWordListWriter(com.joliciel.jochre.lexicon.UnknownWordListWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) LexiconErrorWriter(com.joliciel.jochre.lexicon.LexiconErrorWriter) OutputStreamWriter(java.io.OutputStreamWriter)

Example 5 with ImageAnalyser

use of com.joliciel.jochre.analyser.ImageAnalyser in project jochre by urieli.

the class JochreDocumentGenerator method requestAnalysis.

/**
 * Call if this document should be analysed for letters, after applying
 * split/merge models.
 */
public void requestAnalysis(MostLikelyWordChooser wordChooser) {
    try {
        ClassificationModel letterModel = jochreSession.getLetterModel();
        List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
        LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
        Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
        LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
        BoundaryDetector boundaryDetector = null;
        LetterGuessObserver observer = null;
        if (jochreSession.getSplitModel() != null && jochreSession.getMergeModel() != null) {
            boundaryDetector = new DeterministicBoundaryDetector(jochreSession.getSplitModel(), jochreSession.getMergeModel(), jochreSession);
            OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
            shapeLetterAssigner.setEvaluate(false);
            shapeLetterAssigner.setSave(save);
            shapeLetterAssigner.setSingleLetterMethod(false);
            observer = shapeLetterAssigner;
        } else {
            boundaryDetector = new OriginalBoundaryDetector();
            LetterAssigner letterAssigner = new LetterAssigner();
            letterAssigner.setSave(save);
            observer = letterAssigner;
        }
        ImageAnalyser analyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
        analyser.addObserver(observer);
        this.documentObservers.add(0, analyser);
    } catch (Exception e) {
        LOG.error("Failed to load models", e);
        throw new RuntimeException(e);
    }
}
Also used : OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) LetterAssigner(com.joliciel.jochre.analyser.LetterAssigner) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) IOException(java.io.IOException) JochreException(com.joliciel.jochre.utils.JochreException) LetterGuessObserver(com.joliciel.jochre.analyser.LetterGuessObserver) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel)

Aggregations

BeamSearchImageAnalyser (com.joliciel.jochre.analyser.BeamSearchImageAnalyser)5 ImageAnalyser (com.joliciel.jochre.analyser.ImageAnalyser)5 OriginalShapeLetterAssigner (com.joliciel.jochre.analyser.OriginalShapeLetterAssigner)5 LetterGuesser (com.joliciel.jochre.letterGuesser.LetterGuesser)5 LetterFeature (com.joliciel.jochre.letterGuesser.features.LetterFeature)5 LetterFeatureParser (com.joliciel.jochre.letterGuesser.features.LetterFeatureParser)5 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)5 LetterAssigner (com.joliciel.jochre.analyser.LetterAssigner)4 BoundaryDetector (com.joliciel.jochre.boundaries.BoundaryDetector)4 DeterministicBoundaryDetector (com.joliciel.jochre.boundaries.DeterministicBoundaryDetector)4 OriginalBoundaryDetector (com.joliciel.jochre.boundaries.OriginalBoundaryDetector)4 DocumentObserver (com.joliciel.jochre.doc.DocumentObserver)4 LetterByLetterBoundaryDetector (com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector)3 JochreCorpusImageProcessor (com.joliciel.jochre.graphics.JochreCorpusImageProcessor)3 ErrorLogger (com.joliciel.jochre.analyser.ErrorLogger)2 LetterGuessObserver (com.joliciel.jochre.analyser.LetterGuessObserver)2 RecursiveShapeSplitter (com.joliciel.jochre.boundaries.RecursiveShapeSplitter)2 ShapeMerger (com.joliciel.jochre.boundaries.ShapeMerger)2 ShapeSplitter (com.joliciel.jochre.boundaries.ShapeSplitter)2 TrainingCorpusShapeMerger (com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger)2