use of com.joliciel.jochre.JochreSession in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShapeNoSplitMoreLikely.
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeNoSplitMoreLikely() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.4);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.6);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
int i = 0;
double prob = 1.0;
double twoThirds = 0.4 / 0.6;
LOG.debug("twoThirds: " + twoThirds);
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " decisions:");
for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
if (i == 0) {
prob = 1.0;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(1, shapeSequence.size());
} else if (i == 1) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(2, shapeSequence.size());
} else if (i == 2) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 3) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 4) {
prob = 1.0 * twoThirds * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(4, shapeSequence.size());
}
i++;
}
}
use of com.joliciel.jochre.JochreSession in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShapeSplitMoreLikely.
/**
* If a split is always more likely (e.g. 60% likelihood), ensure the shape
* sequences are ordered correctly.
*/
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeSplitMoreLikely() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.6);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.4);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
int i = 0;
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " shapes:");
for (ShapeInSequence shapeInSequence : shapeSequence) {
Shape oneShape = shapeInSequence.getShape();
LOG.debug("Shape: " + oneShape.getLeft() + "," + oneShape.getRight());
}
LOG.debug("" + shapeSequence.getScore());
i++;
}
i = 0;
double prob = 1.0;
double twoThirds = 0.4 / 0.6;
LOG.debug("twoThirds: " + twoThirds);
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " decisions:");
for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
if (i == 0) {
prob = 1.0;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(4, shapeSequence.size());
} else if (i == 1) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 2) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 3) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(2, shapeSequence.size());
} else if (i == 4) {
prob = 1.0 * twoThirds * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(1, shapeSequence.size());
}
i++;
}
}
use of com.joliciel.jochre.JochreSession in project jochre by urieli.
the class SplitCandidateFinderImplTest method testFindSplitCanidates.
@Test
public void testFindSplitCanidates() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
InputStream imageFileStream = getClass().getResourceAsStream("shape_370454.png");
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
final JochrePage page = mock(JochrePage.class);
JochreImage jochreImage = new SourceImage(page, "name", image, jochreSession);
Shape shape = jochreImage.getShape(0, 0, jochreImage.getWidth() - 1, jochreImage.getHeight() - 1);
SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
List<Split> splits = splitCandidateFinder.findSplitCandidates(shape);
int[] trueSplitPositions = new int[] { 38, 59, 82 };
boolean[] foundSplit = new boolean[] { false, false, false };
for (Split splitCandidate : splits) {
LOG.debug("Split candidate at " + splitCandidate.getPosition());
for (int i = 0; i < trueSplitPositions.length; i++) {
int truePos = trueSplitPositions[i];
int distance = splitCandidate.getPosition() - truePos;
if (distance < 0)
distance = 0 - distance;
if (distance < splitCandidateFinder.getMinDistanceBetweenSplits()) {
foundSplit[i] = true;
LOG.debug("Found split: " + truePos + ", distance " + distance);
}
}
}
for (int i = 0; i < trueSplitPositions.length; i++) {
assertTrue("didn't find split " + trueSplitPositions[i], foundSplit[i]);
}
}
use of com.joliciel.jochre.JochreSession in project jochre by urieli.
the class SegmentationTest method testAlsacien1.
/**
* Segmentation errors reported for Alsacien.
*
* @throws Exception
*/
@Test
public void testAlsacien1(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
Map<String, Object> configMap = new HashMap<>();
configMap.put("jochre.locale", "de");
Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
JochreSession jochreSession = new JochreSession(config);
new Expectations() {
{
jochrePage.getDocument();
result = jochreDoc;
minTimes = 0;
jochreDoc.isLeftToRight();
result = true;
minTimes = 0;
}
};
String imageName = "Alsacien1.jpg";
LOG.debug(imageName);
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
List<Rectangle> textPars = new ArrayList<>();
Rectangle textPar1 = new Rectangle(715, 517, 462, 115);
// TODO: for now it's splitting this paragraph by row, since it's assuming
// paragraphs cannot be
// both outdented and indented on the same page
// Rectangle textPar2 = new Rectangle(50, 666, 1798, 1039);
Rectangle textPar3 = new Rectangle(55, 1837, 1777, 335);
Rectangle textPar4 = new Rectangle(50, 2211, 1765, 154);
Rectangle textPar5 = new Rectangle(44, 2404, 1782, 511);
Rectangle textPar6 = new Rectangle(50, 2948, 1776, 154);
Rectangle textPar7 = new Rectangle(50, 3135, 1770, 77);
// title paragraph
textPars.add(textPar1);
// textPars.add(textPar2);
textPars.add(textPar3);
textPars.add(textPar4);
textPars.add(textPar5);
textPars.add(textPar6);
textPars.add(textPar7);
int i = 0;
int j = 0;
List<Paragraph> textParagraphs = new ArrayList<>();
for (Paragraph par : sourceImage.getParagraphs()) {
Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
Rectangle expected = textPars.get(i);
Rectangle intersection = expected.intersection(real);
double realArea = real.width * real.height;
double expectedArea = expected.width * expected.height;
double intersectionArea = intersection.width * intersection.height;
double realRatio = intersectionArea / realArea;
double expectedRatio = intersectionArea / expectedArea;
LOG.debug("Paragraph " + j + ": " + par.toString());
LOG.debug("realRatio: " + realRatio);
LOG.debug("expectedRatio: " + expectedRatio);
if (realRatio > 0.8 && expectedRatio > 0.8) {
LOG.debug("Found");
textParagraphs.add(par);
i++;
}
j++;
}
assertEquals(textPars.size(), textParagraphs.size());
int[] rowCounts = new int[] { 1, 4, 2, 6, 2, 1 };
int[] wordCountsFirstRow = new int[] { 2, 0, 0, 0, 0, 0, 0 };
for (i = 0; i < textParagraphs.size(); i++) {
assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
RowOfShapes row = textParagraphs.get(i).getRows().get(0);
if (wordCountsFirstRow[i] > 0)
assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
}
}
use of com.joliciel.jochre.JochreSession in project jochre by urieli.
the class SegmentationTest method testAlsacienPlay3.
/**
* Segmentation errors reported for Alsacien play - challenging because of the
* unusual indentation.
*
* @throws Exception
*/
@Test
public void testAlsacienPlay3(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
Map<String, Object> configMap = new HashMap<>();
configMap.put("jochre.locale", "de");
Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
JochreSession jochreSession = new JochreSession(config);
new Expectations() {
{
jochrePage.getDocument();
result = jochreDoc;
minTimes = 0;
jochreDoc.isLeftToRight();
result = true;
minTimes = 0;
}
};
String imageName = "AlsacienPlay3.jpg";
LOG.debug(imageName);
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
List<Rectangle> textPars = new ArrayList<>();
Rectangle textPar1 = new Rectangle(712, 532, 556, 52);
Rectangle textPar2 = new Rectangle(324, 600, 1324, 128);
Rectangle textPar3 = new Rectangle(680, 730, 592, 50);
Rectangle textPar4 = new Rectangle(404, 808, 684, 48);
// title paragraph
textPars.add(textPar1);
textPars.add(textPar2);
textPars.add(textPar3);
textPars.add(textPar4);
int i = 0;
int j = 0;
List<Paragraph> textParagraphs = new ArrayList<>();
for (Paragraph par : sourceImage.getParagraphs()) {
Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
Rectangle expected = textPars.get(i);
Rectangle intersection = expected.intersection(real);
double realArea = real.width * real.height;
double expectedArea = expected.width * expected.height;
double intersectionArea = intersection.width * intersection.height;
double realRatio = intersectionArea / realArea;
double expectedRatio = intersectionArea / expectedArea;
LOG.debug("Paragraph " + j + ": " + par.toString());
LOG.debug("realRatio: " + realRatio);
LOG.debug("expectedRatio: " + expectedRatio);
if (realRatio > 0.8 && expectedRatio > 0.8) {
LOG.debug("Found");
textParagraphs.add(par);
i++;
if (i >= textPars.size())
break;
}
j++;
}
assertEquals(textPars.size(), textParagraphs.size());
int[] rowCounts = new int[] { 1, 2, 1, 1 };
// TODO: words in "spaced" rows (uses spacing to emphasize instead of bold
// or italics) get split
// should try to detect multiple single letter words
int[] wordCountsFirstRow = new int[] { 0, 10, 0, 5 };
for (i = 0; i < textParagraphs.size(); i++) {
assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
RowOfShapes row = textParagraphs.get(i).getRows().get(0);
if (wordCountsFirstRow[i] > 0)
assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
}
}
Aggregations