use of com.joliciel.jochre.letterGuesser.features.LetterFeature in project jochre by urieli.
the class Jochre method doCommandApplyFeatures.
/**
* Apply a set of features to a given image or a given shape.
*/
public void doCommandApplyFeatures(int imageId, int shapeId, List<String> featureDescriptors) {
LetterFeatureTester featureTester = new LetterFeatureTester(jochreSession);
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> features = letterFeatureParser.getLetterFeatureSet(featureDescriptors);
Set<String> letters = new HashSet<>();
featureTester.applyFeatures(features, letters, imageId, shapeId);
}
use of com.joliciel.jochre.letterGuesser.features.LetterFeature in project jochre by urieli.
the class Jochre method doCommandEvaluateFull.
/**
* Evaluate a suite of split/merge models and letter guessing model.
* @param criteria
* for selecting the evaluation corpus
* @param save
* whether or not the letter guesses should be saved
*/
public void doCommandEvaluateFull(CorpusSelectionCriteria criteria, boolean save, File outputDir, MostLikelyWordChooser wordChooser, String suffix, List<DocumentObserver> observers) throws IOException {
String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
if (baseName.lastIndexOf("/") > 0)
baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
ClassificationModel splitModel = jochreSession.getSplitModel();
if (splitModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
splitCandidateFinder.setMinDistanceBetweenSplits(5);
ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
ClassificationModel mergeModel = jochreSession.getMergeModel();
if (mergeModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
ShapeMerger shapeMerger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
String boundaryDetectorTypeName = jochreSession.getConfig().getConfig("jochre.boundaries").getString("boundary-detector-type");
BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.valueOf(boundaryDetectorTypeName);
switch(boundaryDetectorType) {
case LetterByLetter:
boundaryDetector = new LetterByLetterBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
case Deterministic:
boundaryDetector = new DeterministicBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
}
ImageAnalyser imageAnalyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(true);
shapeLetterAssigner.setSave(save);
shapeLetterAssigner.setLetterValidator(letterValidator);
shapeLetterAssigner.setSingleLetterMethod(false);
imageAnalyser.addObserver(shapeLetterAssigner);
ErrorLogger errorLogger = new ErrorLogger(jochreSession);
Writer errorWriter = null;
File errorFile = new File(outputDir, baseName + suffix + "errors.txt");
errorFile.delete();
errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
errorLogger.setErrorWriter(errorWriter);
imageAnalyser.addObserver(errorLogger);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(imageAnalyser);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
imageProcessor.process();
LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + shapeLetterAssigner.getFScoreCalculator().getTotalFScore());
String modelFileName = baseName + suffix + "_full";
File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
shapeLetterAssigner.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
use of com.joliciel.jochre.letterGuesser.features.LetterFeature in project jochre by urieli.
the class Jochre method doCommandAnalyse.
/**
* Analyse a set of images based on a given letter-guessing model.
*
* @param criteria
* the criteria used to select the documents to be analysed
* @param wordChooser
* the word chooser to use
* @param observers
* the observers, used to create analysis output
*/
public void doCommandAnalyse(CorpusSelectionCriteria criteria, MostLikelyWordChooser wordChooser, List<DocumentObserver> observers) throws IOException {
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
ImageAnalyser analyser = new BeamSearchImageAnalyser(null, letterGuesser, wordChooser, jochreSession);
LetterAssigner letterAssigner = new LetterAssigner();
analyser.addObserver(letterAssigner);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(analyser);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
imageProcessor.process();
}
use of com.joliciel.jochre.letterGuesser.features.LetterFeature in project jochre by urieli.
the class Jochre method doCommandTrain.
/**
* Train a letter guessing model.
*
* @param featureDescriptors
* the feature descriptors for training
* @param criteria
* criteria for selecting images to include when training
* @param reconstructLetters
* whether or not complete letters should be reconstructed for
* training, from merged/split letters
*/
public void doCommandTrain(List<String> featureDescriptors, CorpusSelectionCriteria criteria, boolean reconstructLetters) {
if (jochreSession.getLetterModelPath() == null)
throw new RuntimeException("Missing argument: letterModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> features = letterFeatureParser.getLetterFeatureSet(featureDescriptors);
BoundaryDetector boundaryDetector = null;
if (reconstructLetters) {
ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
ShapeMerger merger = new TrainingCorpusShapeMerger();
boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
} else {
boundaryDetector = new OriginalBoundaryDetector();
}
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
ClassificationEventStream corpusEventStream = new JochreLetterEventStream(features, boundaryDetector, letterValidator, criteria, jochreSession);
File letterModelFile = new File(jochreSession.getLetterModelPath());
letterModelFile.getParentFile().mkdirs();
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel letterModel = trainer.trainModel(corpusEventStream, featureDescriptors);
letterModel.persist(letterModelFile);
}
use of com.joliciel.jochre.letterGuesser.features.LetterFeature in project jochre by urieli.
the class Jochre method doCommandAnalyse.
/**
* Full analysis, including merge, split and letter guessing.
*
* @param pages
* the pages to process, empty means all
*/
public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, Set<Integer> pages, List<DocumentObserver> observers, List<PdfImageObserver> imageObservers) throws IOException {
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
LetterGuessObserver letterGuessObserver = null;
if (jochreSession.getSplitModel() != null && jochreSession.getMergeModel() != null) {
boundaryDetector = new DeterministicBoundaryDetector(jochreSession.getSplitModel(), jochreSession.getMergeModel(), jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(false);
shapeLetterAssigner.setSingleLetterMethod(false);
letterGuessObserver = shapeLetterAssigner;
} else {
boundaryDetector = new OriginalBoundaryDetector();
LetterAssigner letterAssigner = new LetterAssigner();
letterGuessObserver = letterAssigner;
}
ImageAnalyser analyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
analyser.addObserver(letterGuessObserver);
JochreDocumentGenerator documentGenerator = new JochreDocumentGenerator(sourceFile.getName(), "", jochreSession);
documentGenerator.addDocumentObserver(analyser);
for (DocumentObserver observer : observers) documentGenerator.addDocumentObserver(observer);
if (!sourceFile.exists())
throw new JochreException("The file " + sourceFile.getPath() + " does not exist");
if (sourceFile.getName().toLowerCase().endsWith(".pdf")) {
PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(sourceFile, pages, documentGenerator);
for (PdfImageObserver imageObserver : imageObservers) {
pdfDocumentProcessor.addImageObserver(imageObserver);
}
pdfDocumentProcessor.process();
} else if (sourceFile.getName().toLowerCase().endsWith(".png") || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else if (sourceFile.isDirectory()) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
}
}
Aggregations