use of com.joliciel.talismane.machineLearning.ClassificationModel in project talismane by joliciel-informatique.
the class PerceptronClassificationModelTrainer method trainModel.
@Override
public ClassificationModel trainModel(ClassificationEventStream corpusEventStream, Map<String, List<String>> descriptors) throws TalismaneException {
params = new PerceptronModelParameters();
decisionMaker = new PerceptronDecisionMaker(params, this.getScoring());
this.descriptors = descriptors;
this.corpusEventStream = corpusEventStream;
this.prepareData(corpusEventStream);
this.train();
ClassificationModel model = this.getModel(params, this.getIterations());
if (this.eventFile != null)
this.eventFile.delete();
return model;
}
use of com.joliciel.talismane.machineLearning.ClassificationModel in project jochre by urieli.
the class Jochre method doCommandEvaluateFull.
/**
* Evaluate a suite of split/merge models and letter guessing model.
* @param criteria
* for selecting the evaluation corpus
* @param save
* whether or not the letter guesses should be saved
*/
public void doCommandEvaluateFull(CorpusSelectionCriteria criteria, boolean save, File outputDir, MostLikelyWordChooser wordChooser, String suffix, List<DocumentObserver> observers) throws IOException {
String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
if (baseName.lastIndexOf("/") > 0)
baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
ClassificationModel splitModel = jochreSession.getSplitModel();
if (splitModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
splitCandidateFinder.setMinDistanceBetweenSplits(5);
ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
ClassificationModel mergeModel = jochreSession.getMergeModel();
if (mergeModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
ShapeMerger shapeMerger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
String boundaryDetectorTypeName = jochreSession.getConfig().getConfig("jochre.boundaries").getString("boundary-detector-type");
BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.valueOf(boundaryDetectorTypeName);
switch(boundaryDetectorType) {
case LetterByLetter:
boundaryDetector = new LetterByLetterBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
case Deterministic:
boundaryDetector = new DeterministicBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
}
ImageAnalyser imageAnalyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(true);
shapeLetterAssigner.setSave(save);
shapeLetterAssigner.setLetterValidator(letterValidator);
shapeLetterAssigner.setSingleLetterMethod(false);
imageAnalyser.addObserver(shapeLetterAssigner);
ErrorLogger errorLogger = new ErrorLogger(jochreSession);
Writer errorWriter = null;
File errorFile = new File(outputDir, baseName + suffix + "errors.txt");
errorFile.delete();
errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
errorLogger.setErrorWriter(errorWriter);
imageAnalyser.addObserver(errorLogger);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(imageAnalyser);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
imageProcessor.process();
LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + shapeLetterAssigner.getFScoreCalculator().getTotalFScore());
String modelFileName = baseName + suffix + "_full";
File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
shapeLetterAssigner.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
use of com.joliciel.talismane.machineLearning.ClassificationModel in project jochre by urieli.
the class Jochre method doCommandEvaluateMerge.
/**
* Evaluate the letter merging model on its own.
*
* @param criteria
* for selecting the portion of the corpus to evaluate
*/
public void doCommandEvaluateMerge(CorpusSelectionCriteria criteria) throws IOException {
ClassificationModel mergeModel = jochreSession.getMergeModel();
if (mergeModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
JochreCorpusGroupReader groupReader = new JochreCorpusGroupReader(jochreSession);
groupReader.setSelectionCriteria(criteria);
ShapeMerger merger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
MergeEvaluator evaluator = new MergeEvaluator(jochreSession);
FScoreCalculator<String> fScoreCalculator = evaluator.evaluate(groupReader, merger);
LOG.debug("" + fScoreCalculator.getTotalFScore());
}
use of com.joliciel.talismane.machineLearning.ClassificationModel in project jochre by urieli.
the class Jochre method doCommandAnalyse.
/**
* Analyse a set of images based on a given letter-guessing model.
*
* @param criteria
* the criteria used to select the documents to be analysed
* @param wordChooser
* the word chooser to use
* @param observers
* the observers, used to create analysis output
*/
public void doCommandAnalyse(CorpusSelectionCriteria criteria, MostLikelyWordChooser wordChooser, List<DocumentObserver> observers) throws IOException {
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
ImageAnalyser analyser = new BeamSearchImageAnalyser(null, letterGuesser, wordChooser, jochreSession);
LetterAssigner letterAssigner = new LetterAssigner();
analyser.addObserver(letterAssigner);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(analyser);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
imageProcessor.process();
}
use of com.joliciel.talismane.machineLearning.ClassificationModel in project talismane by joliciel-informatique.
the class ParserTrainer method train.
public ClassificationModel train() throws TalismaneException, IOException {
ModelTrainerFactory factory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = factory.constructTrainer(parserConfig.getConfig("train.machine-learning"));
ClassificationModel model = trainer.trainModel(eventStream, descriptors);
model.setExternalResources(TalismaneSession.get(sessionId).getExternalResourceFinder().getExternalResources());
File modelDir = modelFile.getParentFile();
if (modelDir != null)
modelDir.mkdirs();
model.persist(modelFile);
return model;
}
Aggregations