use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class FScoreCalculator method writeScoresToCSV.
public void writeScoresToCSV(Writer fscoreFileWriter) {
try {
Set<E> outcomeSet = new TreeSet<E>();
outcomeSet.addAll(this.getOutcomeSet());
fscoreFileWriter.write(CSV.format("outcome"));
for (E outcome : outcomeSet) {
fscoreFileWriter.write(CSV.format(outcome.toString()));
}
fscoreFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-") + CSV.format("precision") + CSV.format("recall") + CSV.format("f-score"));
fscoreFileWriter.write("\n");
DecimalFormat df = (DecimalFormat) DecimalFormat.getNumberInstance(Locale.US);
df.applyPattern("#.##");
double totalPrecisionSum = 0;
double totalRecallSum = 0;
double totalFscoreSum = 0;
for (E outcome : outcomeSet) {
fscoreFileWriter.write(CSV.format(outcome.toString()));
for (E outcome2 : outcomeSet) {
int falseNegativeCount = 0;
Map<E, Integer> falseNegatives = this.getFalseNegatives(outcome);
if (falseNegatives != null && falseNegatives.containsKey(outcome2)) {
falseNegativeCount = this.getFalseNegatives(outcome).get(outcome2);
}
fscoreFileWriter.write(CSV.format(falseNegativeCount));
}
fscoreFileWriter.write(CSV.format(this.getTruePositiveCount(outcome)));
fscoreFileWriter.write(CSV.format(this.getFalsePositiveCount(outcome)));
fscoreFileWriter.write(CSV.format(this.getFalseNegativeCount(outcome)));
fscoreFileWriter.write(CSV.format(this.getPrecision(outcome) * 100));
fscoreFileWriter.write(CSV.format(this.getRecall(outcome) * 100));
fscoreFileWriter.write(CSV.format(this.getFScore(outcome) * 100));
fscoreFileWriter.write("\n");
totalPrecisionSum += this.getPrecision(outcome);
totalRecallSum += this.getRecall(outcome);
totalFscoreSum += this.getFScore(outcome);
}
fscoreFileWriter.write(CSV.format("TOTAL"));
for (E outcome : outcomeSet) {
outcome.hashCode();
fscoreFileWriter.write(CSV.getCsvSeparator());
}
fscoreFileWriter.write(CSV.format(this.getTotalTruePositiveCount()));
fscoreFileWriter.write(CSV.format(this.getTotalFalsePositiveCount()));
fscoreFileWriter.write(CSV.format(this.getTotalFalseNegativeCount()));
fscoreFileWriter.write(CSV.format(this.getTotalPrecision() * 100));
fscoreFileWriter.write(CSV.format(this.getTotalRecall() * 100));
fscoreFileWriter.write(CSV.format(this.getTotalFScore() * 100));
fscoreFileWriter.write("\n");
fscoreFileWriter.write(CSV.format("AVERAGE"));
for (E outcome : outcomeSet) {
outcome.hashCode();
fscoreFileWriter.write(CSV.getCsvSeparator());
}
fscoreFileWriter.write(CSV.getCsvSeparator());
fscoreFileWriter.write(CSV.getCsvSeparator());
fscoreFileWriter.write(CSV.getCsvSeparator());
fscoreFileWriter.write(CSV.format((totalPrecisionSum / outcomeSet.size()) * 100));
fscoreFileWriter.write(CSV.format((totalRecallSum / outcomeSet.size()) * 100));
fscoreFileWriter.write(CSV.format((totalFscoreSum / outcomeSet.size()) * 100));
fscoreFileWriter.write("\n");
} catch (IOException ioe) {
throw new JochreException(ioe);
}
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class Jochre method doCommandTrain.
/**
* Train a letter guessing model.
*
* @param featureDescriptors
* the feature descriptors for training
* @param criteria
* criteria for selecting images to include when training
* @param reconstructLetters
* whether or not complete letters should be reconstructed for
* training, from merged/split letters
*/
public void doCommandTrain(List<String> featureDescriptors, CorpusSelectionCriteria criteria, boolean reconstructLetters) {
if (jochreSession.getLetterModelPath() == null)
throw new RuntimeException("Missing argument: letterModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> features = letterFeatureParser.getLetterFeatureSet(featureDescriptors);
BoundaryDetector boundaryDetector = null;
if (reconstructLetters) {
ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
ShapeMerger merger = new TrainingCorpusShapeMerger();
boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
} else {
boundaryDetector = new OriginalBoundaryDetector();
}
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
ClassificationEventStream corpusEventStream = new JochreLetterEventStream(features, boundaryDetector, letterValidator, criteria, jochreSession);
File letterModelFile = new File(jochreSession.getLetterModelPath());
letterModelFile.getParentFile().mkdirs();
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel letterModel = trainer.trainModel(corpusEventStream, featureDescriptors);
letterModel.persist(letterModelFile);
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class Jochre method doCommandTrainMerge.
/**
* Train the letter merging model.
*
* @param featureDescriptors
* feature descriptors for training
* @param multiplier
* if > 0, will be used to equalize the outcomes
* @param criteria
* the criteria used to select the training corpus
*/
public void doCommandTrainMerge(List<String> featureDescriptors, int multiplier, CorpusSelectionCriteria criteria) {
if (jochreSession.getMergeModelPath() == null)
throw new RuntimeException("Missing argument: mergeModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
File mergeModelFile = new File(jochreSession.getMergeModelPath());
mergeModelFile.getParentFile().mkdirs();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(featureDescriptors);
ClassificationEventStream corpusEventStream = new JochreMergeEventStream(criteria, mergeFeatures, jochreSession);
if (multiplier > 0) {
corpusEventStream = new OutcomeEqualiserEventStream(corpusEventStream, multiplier);
}
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel mergeModel = trainer.trainModel(corpusEventStream, featureDescriptors);
mergeModel.persist(mergeModelFile);
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class Jochre method doCommandTrainSplits.
/**
* Train the letter splitting model.
*
* @param featureDescriptors
* the feature descriptors for training this model
* @param criteria
* the criteria used to select the training corpus
*/
public void doCommandTrainSplits(List<String> featureDescriptors, CorpusSelectionCriteria criteria) {
if (jochreSession.getSplitModelPath() == null)
throw new RuntimeException("Missing argument: splitModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
File splitModelFile = new File(jochreSession.getSplitModelPath());
splitModelFile.getParentFile().mkdirs();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(featureDescriptors);
ClassificationEventStream corpusEventStream = new JochreSplitEventStream(criteria, splitFeatures, jochreSession);
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel splitModel = trainer.trainModel(corpusEventStream, featureDescriptors);
splitModel.persist(splitModelFile);
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class Jochre method doCommandAnalyse.
/**
* Full analysis, including merge, split and letter guessing.
*
* @param pages
* the pages to process, empty means all
*/
public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, Set<Integer> pages, List<DocumentObserver> observers, List<PdfImageObserver> imageObservers) throws IOException {
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
LetterGuessObserver letterGuessObserver = null;
if (jochreSession.getSplitModel() != null && jochreSession.getMergeModel() != null) {
boundaryDetector = new DeterministicBoundaryDetector(jochreSession.getSplitModel(), jochreSession.getMergeModel(), jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(false);
shapeLetterAssigner.setSingleLetterMethod(false);
letterGuessObserver = shapeLetterAssigner;
} else {
boundaryDetector = new OriginalBoundaryDetector();
LetterAssigner letterAssigner = new LetterAssigner();
letterGuessObserver = letterAssigner;
}
ImageAnalyser analyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
analyser.addObserver(letterGuessObserver);
JochreDocumentGenerator documentGenerator = new JochreDocumentGenerator(sourceFile.getName(), "", jochreSession);
documentGenerator.addDocumentObserver(analyser);
for (DocumentObserver observer : observers) documentGenerator.addDocumentObserver(observer);
if (!sourceFile.exists())
throw new JochreException("The file " + sourceFile.getPath() + " does not exist");
if (sourceFile.getName().toLowerCase().endsWith(".pdf")) {
PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(sourceFile, pages, documentGenerator);
for (PdfImageObserver imageObserver : imageObservers) {
pdfDocumentProcessor.addImageObserver(imageObserver);
}
pdfDocumentProcessor.process();
} else if (sourceFile.getName().toLowerCase().endsWith(".png") || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else if (sourceFile.isDirectory()) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
}
}
Aggregations