use of com.joliciel.talismane.machineLearning.ClassificationModelTrainer in project jochre by urieli.
the class Jochre method doCommandTrain.
/**
* Train a letter guessing model.
*
* @param featureDescriptors
* the feature descriptors for training
* @param criteria
* criteria for selecting images to include when training
* @param reconstructLetters
* whether or not complete letters should be reconstructed for
* training, from merged/split letters
*/
public void doCommandTrain(List<String> featureDescriptors, CorpusSelectionCriteria criteria, boolean reconstructLetters) {
if (jochreSession.getLetterModelPath() == null)
throw new RuntimeException("Missing argument: letterModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> features = letterFeatureParser.getLetterFeatureSet(featureDescriptors);
BoundaryDetector boundaryDetector = null;
if (reconstructLetters) {
ShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
ShapeMerger merger = new TrainingCorpusShapeMerger();
boundaryDetector = new LetterByLetterBoundaryDetector(splitter, merger, jochreSession);
} else {
boundaryDetector = new OriginalBoundaryDetector();
}
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
ClassificationEventStream corpusEventStream = new JochreLetterEventStream(features, boundaryDetector, letterValidator, criteria, jochreSession);
File letterModelFile = new File(jochreSession.getLetterModelPath());
letterModelFile.getParentFile().mkdirs();
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel letterModel = trainer.trainModel(corpusEventStream, featureDescriptors);
letterModel.persist(letterModelFile);
}
use of com.joliciel.talismane.machineLearning.ClassificationModelTrainer in project jochre by urieli.
the class Jochre method doCommandTrainMerge.
/**
* Train the letter merging model.
*
* @param featureDescriptors
* feature descriptors for training
* @param multiplier
* if > 0, will be used to equalize the outcomes
* @param criteria
* the criteria used to select the training corpus
*/
public void doCommandTrainMerge(List<String> featureDescriptors, int multiplier, CorpusSelectionCriteria criteria) {
if (jochreSession.getMergeModelPath() == null)
throw new RuntimeException("Missing argument: mergeModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
File mergeModelFile = new File(jochreSession.getMergeModelPath());
mergeModelFile.getParentFile().mkdirs();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(featureDescriptors);
ClassificationEventStream corpusEventStream = new JochreMergeEventStream(criteria, mergeFeatures, jochreSession);
if (multiplier > 0) {
corpusEventStream = new OutcomeEqualiserEventStream(corpusEventStream, multiplier);
}
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel mergeModel = trainer.trainModel(corpusEventStream, featureDescriptors);
mergeModel.persist(mergeModelFile);
}
use of com.joliciel.talismane.machineLearning.ClassificationModelTrainer in project jochre by urieli.
the class Jochre method doCommandTrainSplits.
/**
* Train the letter splitting model.
*
* @param featureDescriptors
* the feature descriptors for training this model
* @param criteria
* the criteria used to select the training corpus
*/
public void doCommandTrainSplits(List<String> featureDescriptors, CorpusSelectionCriteria criteria) {
if (jochreSession.getSplitModelPath() == null)
throw new RuntimeException("Missing argument: splitModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
File splitModelFile = new File(jochreSession.getSplitModelPath());
splitModelFile.getParentFile().mkdirs();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(featureDescriptors);
ClassificationEventStream corpusEventStream = new JochreSplitEventStream(criteria, splitFeatures, jochreSession);
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel splitModel = trainer.trainModel(corpusEventStream, featureDescriptors);
splitModel.persist(splitModelFile);
}
Aggregations