use of com.joliciel.jochre.boundaries.features.SplitFeature in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShape.
/**
* Make sure we get 5 equally weighted sequences in the case of a 50/50 prob for
* splitting each time.
*/
@SuppressWarnings("unchecked")
@Test
public void testSplitShape() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.5);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.5);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
for (ShapeSequence shapeSequence : shapeSequences) {
assertEquals(1.0, shapeSequence.getScore(), 0.0001);
}
}
use of com.joliciel.jochre.boundaries.features.SplitFeature in project jochre by urieli.
the class Jochre method doCommandEvaluateFull.
/**
* Evaluate a suite of split/merge models and letter guessing model.
* @param criteria
* for selecting the evaluation corpus
* @param save
* whether or not the letter guesses should be saved
*/
public void doCommandEvaluateFull(CorpusSelectionCriteria criteria, boolean save, File outputDir, MostLikelyWordChooser wordChooser, String suffix, List<DocumentObserver> observers) throws IOException {
String baseName = jochreSession.getLetterModelPath().substring(0, jochreSession.getLetterModelPath().indexOf("."));
if (baseName.lastIndexOf("/") > 0)
baseName = baseName.substring(baseName.lastIndexOf("/") + 1);
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
ClassificationModel splitModel = jochreSession.getSplitModel();
if (splitModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.split-model");
List<String> splitFeatureDescriptors = splitModel.getFeatureDescriptors();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(splitFeatureDescriptors);
SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
splitCandidateFinder.setMinDistanceBetweenSplits(5);
ShapeSplitter shapeSplitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, splitModel.getDecisionMaker(), jochreSession);
ClassificationModel mergeModel = jochreSession.getMergeModel();
if (mergeModel == null)
throw new IllegalArgumentException("Missing parameter: jochre.image-analyser.merge-model");
List<String> mergeFeatureDescriptors = mergeModel.getFeatureDescriptors();
MergeFeatureParser mergeFeatureParser = new MergeFeatureParser();
Set<MergeFeature<?>> mergeFeatures = mergeFeatureParser.getMergeFeatureSet(mergeFeatureDescriptors);
ShapeMerger shapeMerger = new ShapeMerger(mergeFeatures, mergeModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
String boundaryDetectorTypeName = jochreSession.getConfig().getConfig("jochre.boundaries").getString("boundary-detector-type");
BoundaryDetectorType boundaryDetectorType = BoundaryDetectorType.valueOf(boundaryDetectorTypeName);
switch(boundaryDetectorType) {
case LetterByLetter:
boundaryDetector = new LetterByLetterBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
case Deterministic:
boundaryDetector = new DeterministicBoundaryDetector(shapeSplitter, shapeMerger, jochreSession);
break;
}
ImageAnalyser imageAnalyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
LetterValidator letterValidator = new ComponentCharacterValidator(jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(true);
shapeLetterAssigner.setSave(save);
shapeLetterAssigner.setLetterValidator(letterValidator);
shapeLetterAssigner.setSingleLetterMethod(false);
imageAnalyser.addObserver(shapeLetterAssigner);
ErrorLogger errorLogger = new ErrorLogger(jochreSession);
Writer errorWriter = null;
File errorFile = new File(outputDir, baseName + suffix + "errors.txt");
errorFile.delete();
errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(errorFile, true), "UTF8"));
errorLogger.setErrorWriter(errorWriter);
imageAnalyser.addObserver(errorLogger);
JochreCorpusImageProcessor imageProcessor = new JochreCorpusImageProcessor(criteria, jochreSession);
imageProcessor.addObserver(imageAnalyser);
for (DocumentObserver observer : observers) imageProcessor.addObserver(observer);
imageProcessor.process();
LOG.debug("F-score for " + jochreSession.getLetterModelPath() + ": " + shapeLetterAssigner.getFScoreCalculator().getTotalFScore());
String modelFileName = baseName + suffix + "_full";
File fscoreFile = new File(outputDir, modelFileName + "_fscores.csv");
Writer fscoreWriter = errorWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, true), jochreSession.getCsvEncoding()));
shapeLetterAssigner.getFScoreCalculator().writeScoresToCSV(fscoreWriter);
}
use of com.joliciel.jochre.boundaries.features.SplitFeature in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShapeNoSplitMoreLikely.
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeNoSplitMoreLikely() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.4);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.6);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
int i = 0;
double prob = 1.0;
double twoThirds = 0.4 / 0.6;
LOG.debug("twoThirds: " + twoThirds);
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " decisions:");
for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
if (i == 0) {
prob = 1.0;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(1, shapeSequence.size());
} else if (i == 1) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(2, shapeSequence.size());
} else if (i == 2) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 3) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 4) {
prob = 1.0 * twoThirds * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(4, shapeSequence.size());
}
i++;
}
}
use of com.joliciel.jochre.boundaries.features.SplitFeature in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShapeSplitMoreLikely.
/**
* If a split is always more likely (e.g. 60% likelihood), ensure the shape
* sequences are ordered correctly.
*/
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeSplitMoreLikely() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.6);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.4);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
int i = 0;
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " shapes:");
for (ShapeInSequence shapeInSequence : shapeSequence) {
Shape oneShape = shapeInSequence.getShape();
LOG.debug("Shape: " + oneShape.getLeft() + "," + oneShape.getRight());
}
LOG.debug("" + shapeSequence.getScore());
i++;
}
i = 0;
double prob = 1.0;
double twoThirds = 0.4 / 0.6;
LOG.debug("twoThirds: " + twoThirds);
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " decisions:");
for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
if (i == 0) {
prob = 1.0;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(4, shapeSequence.size());
} else if (i == 1) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 2) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 3) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(2, shapeSequence.size());
} else if (i == 4) {
prob = 1.0 * twoThirds * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(1, shapeSequence.size());
}
i++;
}
}
use of com.joliciel.jochre.boundaries.features.SplitFeature in project jochre by urieli.
the class Jochre method doCommandTrainSplits.
/**
* Train the letter splitting model.
*
* @param featureDescriptors
* the feature descriptors for training this model
* @param criteria
* the criteria used to select the training corpus
*/
public void doCommandTrainSplits(List<String> featureDescriptors, CorpusSelectionCriteria criteria) {
if (jochreSession.getSplitModelPath() == null)
throw new RuntimeException("Missing argument: splitModel");
if (featureDescriptors == null)
throw new JochreException("features is required");
File splitModelFile = new File(jochreSession.getSplitModelPath());
splitModelFile.getParentFile().mkdirs();
SplitFeatureParser splitFeatureParser = new SplitFeatureParser();
Set<SplitFeature<?>> splitFeatures = splitFeatureParser.getSplitFeatureSet(featureDescriptors);
ClassificationEventStream corpusEventStream = new JochreSplitEventStream(criteria, splitFeatures, jochreSession);
ModelTrainerFactory modelTrainerFactory = new ModelTrainerFactory();
ClassificationModelTrainer trainer = modelTrainerFactory.constructTrainer(jochreSession.getConfig());
ClassificationModel splitModel = trainer.trainModel(corpusEventStream, featureDescriptors);
splitModel.persist(splitModelFile);
}
Aggregations