use of com.joliciel.jochre.boundaries.features.MergeFeature in project jochre by urieli.
the class Jochre method doCommandEvaluateFull.
/**
* Evaluate a suite of split/merge models and letter guessing model.
* @param criteria
* for selecting the evaluation corpus
* @param save
* whether or not the letter guesses should be saved
*/
public void doCommandEvaluateFull(CorpusSelectionCriteria criteria, boolean save, File outputDir, MostLikelyWordChooser wordChooser, String suffix, List<DocumentObserver> observers) throws IOException {
String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
if (baseName.lastIndexOf("/") > 0)
baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
ClassificationModel splitModel = jochreSession.getSplitModel();
if (splitModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
splitCandidateFinder.setMinDistanceBetweenSplits(5);
ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
ClassificationModel mergeModel = jochreSession.getMergeModel();
if (mergeModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
ShapeMerger shapeMerger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
String boundaryDetectorTypeName = jochreSession.getConfig().getConfig("jochre.boundaries").getString("boundary-detector-type");
BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.valueOf(boundaryDetectorTypeName);
switch(boundaryDetectorType) {
case LetterByLetter:
boundaryDetector = new LetterByLetterBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
case Deterministic:
boundaryDetector = new DeterministicBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
}
ImageAnalyser imageAnalyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(true);
shapeLetterAssigner.setSave(save);
shapeLetterAssigner.setLetterValidator(letterValidator);
shapeLetterAssigner.setSingleLetterMethod(false);
imageAnalyser.addObserver(shapeLetterAssigner);
ErrorLogger errorLogger = new ErrorLogger(jochreSession);
Writer errorWriter = null;
File errorFile = new File(outputDir, baseName + suffix + "errors.txt");
errorFile.delete();
errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
errorLogger.setErrorWriter(errorWriter);
imageAnalyser.addObserver(errorLogger);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(imageAnalyser);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
imageProcessor.process();
LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + shapeLetterAssigner.getFScoreCalculator().getTotalFScore());
String modelFileName = baseName + suffix + "_full";
File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
shapeLetterAssigner.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
use of com.joliciel.jochre.boundaries.features.MergeFeature in project jochre by urieli.
the class Jochre method doCommandEvaluateMerge.
/**
* Evaluate the letter merging model on its own.
*
* @param criteria
* for selecting the portion of the corpus to evaluate
*/
public void doCommandEvaluateMerge(CorpusSelectionCriteria criteria) throws IOException {
ClassificationModel mergeModel = jochreSession.getMergeModel();
if (mergeModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
JochreCorpusGroupReader groupReader = new JochreCorpusGroupReader(jochreSession);
groupReader.setSelectionCriteria(criteria);
ShapeMerger merger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
MergeEvaluator evaluator = new MergeEvaluator(jochreSession);
FScoreCalculator<String> fScoreCalculator = evaluator.evaluate(groupReader, merger);
LOG.debug("" + fScoreCalculator.getTotalFScore());
}
use of com.joliciel.jochre.boundaries.features.MergeFeature in project jochre by urieli.
the class Jochre method doCommandTrainMerge.
/**
* Train the letter merging model.
*
* @param featureDescriptors
* feature descriptors for training
* @param multiplier
* if > 0, will be used to equalize the outcomes
* @param criteria
* the criteria used to select the training corpus
*/
public void doCommandTrainMerge(List<String> featureDescriptors, int multiplier, CorpusSelectionCriteria criteria) {
if (jochreSession.getMergeModelPath() == null)
throw new RuntimeException("Missing argument: mergeModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
File mergeModelFile = new File(jochreSession.getMergeModelPath());
mergeModelFile.getParentFile().mkdirs();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(featureDescriptors);
ClassificationEventStream corpusEventStream = new JochreMergeEventStream(criteria, mergeFeatures, jochreSession);
if (multiplier > 0) {
corpusEventStream = new OutcomeEqualiserEventStream(corpusEventStream, multiplier);
}
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel mergeModel = trainer.trainModel(corpusEventStream, featureDescriptors);
mergeModel.persist(mergeModelFile);
}
Aggregations