Search in sources :

Example 11 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class FScoreCalculator method writeScoresToCSV.

public void writeScoresToCSV(Writer fscoreFileWriter) {
    try {
        Set<E> outcomeSet = new TreeSet<E>();
        outcomeSet.addAll(this.getOutcomeSet());
        fscoreFileWriter.write(CSV.format("outcome"));
        for (E outcome : outcomeSet) {
            fscoreFileWriter.write(CSV.format(outcome.toString()));
        }
        fscoreFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-") + CSV.format("precision") + CSV.format("recall") + CSV.format("f-score"));
        fscoreFileWriter.write("\n");
        DecimalFormat df = (DecimalFormat) DecimalFormat.getNumberInstance(Locale.US);
        df.applyPattern("#.##");
        double totalPrecisionSum = 0;
        double totalRecallSum = 0;
        double totalFscoreSum = 0;
        for (E outcome : outcomeSet) {
            fscoreFileWriter.write(CSV.format(outcome.toString()));
            for (E outcome2 : outcomeSet) {
                int falseNegativeCount = 0;
                Map<E, Integer> falseNegatives = this.getFalseNegatives(outcome);
                if (falseNegatives != null && falseNegatives.containsKey(outcome2)) {
                    falseNegativeCount = this.getFalseNegatives(outcome).get(outcome2);
                }
                fscoreFileWriter.write(CSV.format(falseNegativeCount));
            }
            fscoreFileWriter.write(CSV.format(this.getTruePositiveCount(outcome)));
            fscoreFileWriter.write(CSV.format(this.getFalsePositiveCount(outcome)));
            fscoreFileWriter.write(CSV.format(this.getFalseNegativeCount(outcome)));
            fscoreFileWriter.write(CSV.format(this.getPrecision(outcome) * 100));
            fscoreFileWriter.write(CSV.format(this.getRecall(outcome) * 100));
            fscoreFileWriter.write(CSV.format(this.getFScore(outcome) * 100));
            fscoreFileWriter.write("\n");
            totalPrecisionSum += this.getPrecision(outcome);
            totalRecallSum += this.getRecall(outcome);
            totalFscoreSum += this.getFScore(outcome);
        }
        fscoreFileWriter.write(CSV.format("TOTAL"));
        for (E outcome : outcomeSet) {
            outcome.hashCode();
            fscoreFileWriter.write(CSV.getCsvSeparator());
        }
        fscoreFileWriter.write(CSV.format(this.getTotalTruePositiveCount()));
        fscoreFileWriter.write(CSV.format(this.getTotalFalsePositiveCount()));
        fscoreFileWriter.write(CSV.format(this.getTotalFalseNegativeCount()));
        fscoreFileWriter.write(CSV.format(this.getTotalPrecision() * 100));
        fscoreFileWriter.write(CSV.format(this.getTotalRecall() * 100));
        fscoreFileWriter.write(CSV.format(this.getTotalFScore() * 100));
        fscoreFileWriter.write("\n");
        fscoreFileWriter.write(CSV.format("AVERAGE"));
        for (E outcome : outcomeSet) {
            outcome.hashCode();
            fscoreFileWriter.write(CSV.getCsvSeparator());
        }
        fscoreFileWriter.write(CSV.getCsvSeparator());
        fscoreFileWriter.write(CSV.getCsvSeparator());
        fscoreFileWriter.write(CSV.getCsvSeparator());
        fscoreFileWriter.write(CSV.format((totalPrecisionSum / outcomeSet.size()) * 100));
        fscoreFileWriter.write(CSV.format((totalRecallSum / outcomeSet.size()) * 100));
        fscoreFileWriter.write(CSV.format((totalFscoreSum / outcomeSet.size()) * 100));
        fscoreFileWriter.write("\n");
    } catch (IOException ioe) {
        throw new JochreException(ioe);
    }
}
Also used : JochreException(com.joliciel.jochre.utils.JochreException) TreeSet(java.util.TreeSet) DecimalFormat(java.text.DecimalFormat) IOException(java.io.IOException)

Example 12 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class Jochre method doCommandTrain.

/**
 * Train a letter guessing model.
 *
 * @param featureDescriptors
 *          the feature descriptors for training
 * @param criteria
 *          criteria for selecting images to include when training
 * @param reconstructLetters
 *          whether or not complete letters should be reconstructed for
 *          training, from merged/split letters
 */
public void doCommandTrain(List<String> featureDescriptors, CorpusSelectionCriteria criteria, boolean reconstructLetters) {
    if (jochreSession.getLetterModelPath() == null)
        throw new RuntimeException("Missing argument: letterModel");
    if (featureDescriptors == null)
        throw new JochreException("features is required");
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> features = letterFeatureParser.getLetterFeatureSet(featureDescriptors);
    BoundaryDetector boundaryDetector = null;
    if (reconstructLetters) {
        ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
        ShapeMerger merger = new TrainingCorpusShapeMerger();
        boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
    } else {
        boundaryDetector = new OriginalBoundaryDetector();
    }
    LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
    ClassificationEventStream corpusEventStream = new JochreLetterEventStream(features, boundaryDetector, letterValidator, criteria, jochreSession);
    File letterModelFile = new File(jochreSession.getLetterModelPath());
    letterModelFile.getParentFile().mkdirs();
    ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
    ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
    ClassificationModel letterModel = trainer.trainModel(corpusEventStream, featureDescriptors);
    letterModel.persist(letterModelFile);
}
Also used : LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) LetterValidator(com.joliciel.jochre.letterGuesser.LetterValidator) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) JochreLetterEventStream(com.joliciel.jochre.letterGuesser.JochreLetterEventStream) ModelTrainerFactory(com.joliciel.talismane.machineLearning.ModelTrainerFactory) JochreException(com.joliciel.jochre.utils.JochreException) ClassificationModelTrainer(com.joliciel.talismane.machineLearning.ClassificationModelTrainer) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) TrainingCorpusShapeMerger(com.joliciel.jochre.boundaries.TrainingCorpusShapeMerger) ShapeMerger(com.joliciel.jochre.boundaries.ShapeMerger) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) RecursiveShapeSplitter(com.joliciel.jochre.boundaries.RecursiveShapeSplitter) TrainingCorpusShapeSplitter(com.joliciel.jochre.boundaries.TrainingCorpusShapeSplitter) ShapeSplitter(com.joliciel.jochre.boundaries.ShapeSplitter) ComponentCharacterValidator(com.joliciel.jochre.letterGuesser.ComponentCharacterValidator) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel)

Example 13 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class Jochre method doCommandTrainMerge.

/**
 * Train the letter merging model.
 *
 * @param featureDescriptors
 *          feature descriptors for training
 * @param multiplier
 *          if &gt; 0, will be used to equalize the outcomes
 * @param criteria
 *          the criteria used to select the training corpus
 */
public void doCommandTrainMerge(List<String> featureDescriptors, int multiplier, CorpusSelectionCriteria criteria) {
    if (jochreSession.getMergeModelPath() == null)
        throw new RuntimeException("Missing argument: mergeModel");
    if (featureDescriptors == null)
        throw new JochreException("features is required");
    File mergeModelFile = new File(jochreSession.getMergeModelPath());
    mergeModelFile.getParentFile().mkdirs();
    MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
    Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(featureDescriptors);
    ClassificationEventStream corpusEventStream = new JochreMergeEventStream(criteria, mergeFeatures, jochreSession);
    if (multiplier > 0) {
        corpusEventStream = new OutcomeEqualiserEventStream(corpusEventStream, multiplier);
    }
    ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
    ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
    ClassificationModel mergeModel = trainer.trainModel(corpusEventStream, featureDescriptors);
    mergeModel.persist(mergeModelFile);
}
Also used : MergeFeatureParser(com.joliciel.jochre.boundaries.features.MergeFeatureParser) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) ModelTrainerFactory(com.joliciel.talismane.machineLearning.ModelTrainerFactory) JochreException(com.joliciel.jochre.utils.JochreException) ClassificationModelTrainer(com.joliciel.talismane.machineLearning.ClassificationModelTrainer) MergeFeature(com.joliciel.jochre.boundaries.features.MergeFeature) File(java.io.File) JochreMergeEventStream(com.joliciel.jochre.boundaries.JochreMergeEventStream) OutcomeEqualiserEventStream(com.joliciel.talismane.machineLearning.OutcomeEqualiserEventStream) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel)

Example 14 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class Jochre method doCommandTrainSplits.

/**
 * Train the letter splitting model.
 *
 * @param featureDescriptors
 *          the feature descriptors for training this model
 * @param criteria
 *          the criteria used to select the training corpus
 */
public void doCommandTrainSplits(List<String> featureDescriptors, CorpusSelectionCriteria criteria) {
    if (jochreSession.getSplitModelPath() == null)
        throw new RuntimeException("Missing argument: splitModel");
    if (featureDescriptors == null)
        throw new JochreException("features is required");
    File splitModelFile = new File(jochreSession.getSplitModelPath());
    splitModelFile.getParentFile().mkdirs();
    SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
    Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(featureDescriptors);
    ClassificationEventStream corpusEventStream = new JochreSplitEventStream(criteria, splitFeatures, jochreSession);
    ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
    ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
    ClassificationModel splitModel = trainer.trainModel(corpusEventStream, featureDescriptors);
    splitModel.persist(splitModelFile);
}
Also used : ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) ModelTrainerFactory(com.joliciel.talismane.machineLearning.ModelTrainerFactory) JochreException(com.joliciel.jochre.utils.JochreException) ClassificationModelTrainer(com.joliciel.talismane.machineLearning.ClassificationModelTrainer) SplitFeatureParser(com.joliciel.jochre.boundaries.features.SplitFeatureParser) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) JochreSplitEventStream(com.joliciel.jochre.boundaries.JochreSplitEventStream) File(java.io.File) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel)

Example 15 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class Jochre method doCommandAnalyse.

/**
 * Full analysis, including merge, split and letter guessing.
 *
 * @param pages
 *          the pages to process, empty means all
 */
public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, Set<Integer> pages, List<DocumentObserver> observers, List<PdfImageObserver> imageObservers) throws IOException {
    ClassificationModel letterModel = jochreSession.getLetterModel();
    List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
    LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
    Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
    LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
    BoundaryDetector boundaryDetector = null;
    LetterGuessObserver letterGuessObserver = null;
    if (jochreSession.getSplitModel() != null && jochreSession.getMergeModel() != null) {
        boundaryDetector = new DeterministicBoundaryDetector(jochreSession.getSplitModel(), jochreSession.getMergeModel(), jochreSession);
        OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
        shapeLetterAssigner.setEvaluate(false);
        shapeLetterAssigner.setSingleLetterMethod(false);
        letterGuessObserver = shapeLetterAssigner;
    } else {
        boundaryDetector = new OriginalBoundaryDetector();
        LetterAssigner letterAssigner = new LetterAssigner();
        letterGuessObserver = letterAssigner;
    }
    ImageAnalyser analyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
    analyser.addObserver(letterGuessObserver);
    JochreDocumentGenerator documentGenerator = new JochreDocumentGenerator(sourceFile.getName(), "", jochreSession);
    documentGenerator.addDocumentObserver(analyser);
    for (DocumentObserver observer : observers) documentGenerator.addDocumentObserver(observer);
    if (!sourceFile.exists())
        throw new JochreException("The file " + sourceFile.getPath() + " does not exist");
    if (sourceFile.getName().toLowerCase().endsWith(".pdf")) {
        PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(sourceFile, pages, documentGenerator);
        for (PdfImageObserver imageObserver : imageObservers) {
            pdfDocumentProcessor.addImageObserver(imageObserver);
        }
        pdfDocumentProcessor.process();
    } else if (sourceFile.getName().toLowerCase().endsWith(".png") || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif")) {
        ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
        extractor.extractDocument();
    } else if (sourceFile.isDirectory()) {
        ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
        extractor.extractDocument();
    } else {
        throw new RuntimeException("Unrecognised file extension");
    }
}
Also used : PdfImageObserver(com.joliciel.jochre.utils.pdf.PdfImageObserver) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) ImageAnalyser(com.joliciel.jochre.analyser.ImageAnalyser) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) BoundaryDetector(com.joliciel.jochre.boundaries.BoundaryDetector) LetterByLetterBoundaryDetector(com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) DeterministicBoundaryDetector(com.joliciel.jochre.boundaries.DeterministicBoundaryDetector) OriginalShapeLetterAssigner(com.joliciel.jochre.analyser.OriginalShapeLetterAssigner) LetterAssigner(com.joliciel.jochre.analyser.LetterAssigner) DocumentObserver(com.joliciel.jochre.doc.DocumentObserver) LetterGuesser(com.joliciel.jochre.letterGuesser.LetterGuesser) LetterGuessObserver(com.joliciel.jochre.analyser.LetterGuessObserver) JochreDocumentGenerator(com.joliciel.jochre.doc.JochreDocumentGenerator) PdfDocumentProcessor(com.joliciel.jochre.pdf.PdfDocumentProcessor) OriginalBoundaryDetector(com.joliciel.jochre.boundaries.OriginalBoundaryDetector) JochreException(com.joliciel.jochre.utils.JochreException) LetterFeature(com.joliciel.jochre.letterGuesser.features.LetterFeature) BeamSearchImageAnalyser(com.joliciel.jochre.analyser.BeamSearchImageAnalyser) LetterFeatureParser(com.joliciel.jochre.letterGuesser.features.LetterFeatureParser) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) ImageDocumentExtractor(com.joliciel.jochre.doc.ImageDocumentExtractor)

Aggregations

JochreException (com.joliciel.jochre.utils.JochreException)23 IOException (java.io.IOException)15 BufferedImage (java.awt.image.BufferedImage)7 File (java.io.File)7 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)5 Shape (com.joliciel.jochre.graphics.Shape)4 ClassificationEventStream (com.joliciel.talismane.machineLearning.ClassificationEventStream)4 ClassificationModelTrainer (com.joliciel.talismane.machineLearning.ClassificationModelTrainer)4 ModelTrainerFactory (com.joliciel.talismane.machineLearning.ModelTrainerFactory)4 BoundaryDetector (com.joliciel.jochre.boundaries.BoundaryDetector)3 DeterministicBoundaryDetector (com.joliciel.jochre.boundaries.DeterministicBoundaryDetector)3 LetterByLetterBoundaryDetector (com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector)3 OriginalBoundaryDetector (com.joliciel.jochre.boundaries.OriginalBoundaryDetector)3 JochreImage (com.joliciel.jochre.graphics.JochreImage)3 LetterFeature (com.joliciel.jochre.letterGuesser.features.LetterFeature)3 LetterFeatureParser (com.joliciel.jochre.letterGuesser.features.LetterFeatureParser)3 TreeSet (java.util.TreeSet)3 BeamSearchImageAnalyser (com.joliciel.jochre.analyser.BeamSearchImageAnalyser)2 ImageAnalyser (com.joliciel.jochre.analyser.ImageAnalyser)2 LetterAssigner (com.joliciel.jochre.analyser.LetterAssigner)2