Search in sources :

Example 1 with SplitFeatureParser

use of com.joliciel.jochre.boundaries.features.SplitFeatureParser in project jochre by urieli.

the class Jochre method doCommandEvaluateFull.

/**
 * Evaluate a suite of split/merge models and letter guessing model.
 *  @param criteria
 *          for selecting the evaluation corpus
 * @param save
 *          whether or not the letter guesses should be saved
 */
public void doCommandEvaluateFull(CorpusSelectionCriteria criteria, boolean save, File outputDir, MostLikelyWordChooser wordChooser, String suffix, List<DocumentObserver> observers) throws IOException {
    String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
    if (baseName.lastIndexOf("/") > 0)
        baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    ClassificationModel splitModel = jochreSession.getSplitModel();
    if (splitModel == null)
        throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
    List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
    SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
    Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
    SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
    splitCandidateFinder.setMinDistanceBetweenSplits(5);
    ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
    ClassificationModel mergeModel = jochreSession.getMergeModel();
    if (mergeModel == null)
        throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
    List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
    MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
    Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
    ShapeMerger shapeMerger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
    BoundaryDetector boundaryDetector = null;
    String boundaryDetectorTypeName = jochreSession.getConfig().getConfig("jochre.boundaries").getString("boundary-detector-type");
    BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.valueOf(boundaryDetectorTypeName);
    switch(boundaryDetectorType) {
        case LetterByLetter:
            boundaryDetector = new LetterByLetterBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
            break;
        case Deterministic:
            boundaryDetector = new DeterministicBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
            break;
    }
    ImageAnalyser imageAnalyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
    LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
    OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
    shapeLetterAssigner.setEvaluate(true);
    shapeLetterAssigner.setSave(save);
    shapeLetterAssigner.setLetterValidator(letterValidator);
    shapeLetterAssigner.setSingleLetterMethod(false);
    imageAnalyser.addObserver(shapeLetterAssigner);
    ErrorLogger errorLogger = new ErrorLogger(jochreSession);
    Writer errorWriter = null;
    File errorFile = new File(outputDir, baseName + suffix + "errors.txt");
    errorFile.delete();
    errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
    errorLogger.setErrorWriter(errorWriter);
    imageAnalyser.addObserver(errorLogger);
    JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
    imageProcessor.addObserver(imageAnalyser);
    for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
    imageProcessor.process();
    LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + shapeLetterAssigner.getFScoreCalculator().getTotalFScore());
    String modelFileName = baseName + suffix + "_full";
    File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
    Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
    shapeLetterAssigner.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
Also used : LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) BufferedWriter(java.io.BufferedWriter) MergeFeatureParser(com.joliciel.jochre.boundaries.features.MergeFeatureParser) LetterValidator(com.joliciel.jochre.letterGuesser.LetterValidator) JochreCorpusImageProcessor(com.joliciel.jochre.graphics.JochreCorpusImageProcessor) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ShapeSplitter(com.joliciel.jochre.boundaries.ShapeSplitter) SplitCandidateFinder(com.joliciel.jochre.boundaries.SplitCandidateFinder) MergeFeature(com.joliciel.jochre.boundaries.features.MergeFeature) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) SplitFeatureParser(com.joliciel.jochre.boundaries.features.SplitFeatureParser) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) ErrorLogger(com.joliciel.jochre.analyser.ErrorLogger) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) ShapeMerger(com.joliciel.jochre.boundaries.ShapeMerger) FileOutputStream(java.io.FileOutputStream) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) OutputStreamWriter(java.io.OutputStreamWriter) ComponentCharacterValidator(com.joliciel.jochre.letterGuesser.ComponentCharacterValidator) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) UnknownWordListWriter(com.joliciel.jochre.lexicon.UnknownWordListWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) LexiconErrorWriter(com.joliciel.jochre.lexicon.LexiconErrorWriter) OutputStreamWriter(java.io.OutputStreamWriter)

Example 2 with SplitFeatureParser

use of com.joliciel.jochre.boundaries.features.SplitFeatureParser in project jochre by urieli.

the class Jochre method doCommandTrainSplits.

/**
 * Train the letter splitting model.
 *
 * @param featureDescriptors
 *          the feature descriptors for training this model
 * @param criteria
 *          the criteria used to select the training corpus
 */
public void doCommandTrainSplits(List<String> featureDescriptors, CorpusSelectionCriteria criteria) {
    if (jochreSession.getSplitModelPath() == null)
        throw new RuntimeException("Missing argument: splitModel");
    if (featureDescriptors == null)
        throw new JochreException("features is required");
    File splitModelFile = new File(jochreSession.getSplitModelPath());
    splitModelFile.getParentFile().mkdirs();
    SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
    Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(featureDescriptors);
    ClassificationEventStream corpusEventStream = new JochreSplitEventStream(criteria, splitFeatures, jochreSession);
    ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
    ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
    ClassificationModel splitModel = trainer.trainModel(corpusEventStream, featureDescriptors);
    splitModel.persist(splitModelFile);
}
Also used : ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) ModelTrainerFactory(com.joliciel.talismane.machineLearning.ModelTrainerFactory) JochreException(com.joliciel.jochre.utils.JochreException) ClassificationModelTrainer(com.joliciel.talismane.machineLearning.ClassificationModelTrainer) SplitFeatureParser(com.joliciel.jochre.boundaries.features.SplitFeatureParser) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) JochreSplitEventStream(com.joliciel.jochre.boundaries.JochreSplitEventStream) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel)

Example 3 with SplitFeatureParser

use of com.joliciel.jochre.boundaries.features.SplitFeatureParser in project jochre by urieli.

the class Jochre method doCommandEvaluateSplits.

/**
 * Evaluate the letter splitting model on its own.
 *
 * @param criteria
 *          the criteria used to select the evaluation corpus
 */
public void doCommandEvaluateSplits(CorpusSelectionCriteria criteria) throws IOException {
    ClassificationModel splitModel = jochreSession.getSplitModel();
    if (splitModel == null)
        throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
    List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
    SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
    Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
    SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
    splitCandidateFinder.setMinDistanceBetweenSplits(5);
    ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
    JochreCorpusShapeReader shapeReader = new JochreCorpusShapeReader(jochreSession);
    shapeReader.setSelectionCriteria(criteria);
    SplitEvaluator splitEvaluator = new SplitEvaluator(jochreSession);
    FScoreCalculator<String> fScoreCalculator = splitEvaluator.evaluate(shapeReader, shapeSplitter);
    LOG.debug("" + fScoreCalculator.getTotalFScore());
}
Also used : RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) SplitFeatureParser(com.joliciel.jochre.boundaries.features.SplitFeatureParser) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) JochreCorpusShapeReader(com.joliciel.jochre.graphics.JochreCorpusShapeReader) SplitEvaluator(com.joliciel.jochre.boundaries.SplitEvaluator) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ShapeSplitter(com.joliciel.jochre.boundaries.ShapeSplitter) SplitCandidateFinder(com.joliciel.jochre.boundaries.SplitCandidateFinder) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel)

Aggregations

SplitFeature (com.joliciel.jochre.boundaries.features.SplitFeature)3 SplitFeatureParser (com.joliciel.jochre.boundaries.features.SplitFeatureParser)3 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)3 RecursiveShapeSplitter (com.joliciel.jochre.boundaries.RecursiveShapeSplitter)2 ShapeSplitter (com.joliciel.jochre.boundaries.ShapeSplitter)2 SplitCandidateFinder (com.joliciel.jochre.boundaries.SplitCandidateFinder)2 TrainingCorpusShapeSplitter (com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter)2 File (java.io.File)2 BeamSearchImageAnalyser (com.joliciel.jochre.analyser.BeamSearchImageAnalyser)1 ErrorLogger (com.joliciel.jochre.analyser.ErrorLogger)1 ImageAnalyser (com.joliciel.jochre.analyser.ImageAnalyser)1 OriginalShapeLetterAssigner (com.joliciel.jochre.analyser.OriginalShapeLetterAssigner)1 BoundaryDetector (com.joliciel.jochre.boundaries.BoundaryDetector)1 DeterministicBoundaryDetector (com.joliciel.jochre.boundaries.DeterministicBoundaryDetector)1 JochreSplitEventStream (com.joliciel.jochre.boundaries.JochreSplitEventStream)1 LetterByLetterBoundaryDetector (com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector)1 OriginalBoundaryDetector (com.joliciel.jochre.boundaries.OriginalBoundaryDetector)1 ShapeMerger (com.joliciel.jochre.boundaries.ShapeMerger)1 SplitEvaluator (com.joliciel.jochre.boundaries.SplitEvaluator)1 TrainingCorpusShapeMerger (com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger)1