use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.
the class SegmentationTest method testAlsacienPlay3.
/**
* Segmentation errors reported for Alsacien play - challenging because of the
* unusual indentation.
*
* @throws Exception
*/
@Test
public void testAlsacienPlay3(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
Map<String, Object> configMap = new HashMap<>();
configMap.put("jochre.locale", "de");
Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
JochreSession jochreSession = new JochreSession(config);
new Expectations() {
{
jochrePage.getDocument();
result = jochreDoc;
minTimes = 0;
jochreDoc.isLeftToRight();
result = true;
minTimes = 0;
}
};
String imageName = "AlsacienPlay3.jpg";
LOG.debug(imageName);
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
List<Rectangle> textPars = new ArrayList<>();
Rectangle textPar1 = new Rectangle(712, 532, 556, 52);
Rectangle textPar2 = new Rectangle(324, 600, 1324, 128);
Rectangle textPar3 = new Rectangle(680, 730, 592, 50);
Rectangle textPar4 = new Rectangle(404, 808, 684, 48);
// title paragraph
textPars.add(textPar1);
textPars.add(textPar2);
textPars.add(textPar3);
textPars.add(textPar4);
int i = 0;
int j = 0;
List<Paragraph> textParagraphs = new ArrayList<>();
for (Paragraph par : sourceImage.getParagraphs()) {
Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
Rectangle expected = textPars.get(i);
Rectangle intersection = expected.intersection(real);
double realArea = real.width * real.height;
double expectedArea = expected.width * expected.height;
double intersectionArea = intersection.width * intersection.height;
double realRatio = intersectionArea / realArea;
double expectedRatio = intersectionArea / expectedArea;
LOG.debug("Paragraph " + j + ": " + par.toString());
LOG.debug("realRatio: " + realRatio);
LOG.debug("expectedRatio: " + expectedRatio);
if (realRatio > 0.8 && expectedRatio > 0.8) {
LOG.debug("Found");
textParagraphs.add(par);
i++;
if (i >= textPars.size())
break;
}
j++;
}
assertEquals(textPars.size(), textParagraphs.size());
int[] rowCounts = new int[] { 1, 2, 1, 1 };
// TODO: words in "spaced" rows (uses spacing to emphasize instead of bold
// or italics) get split
// should try to detect multiple single letter words
int[] wordCountsFirstRow = new int[] { 0, 10, 0, 5 };
for (i = 0; i < textParagraphs.size(); i++) {
assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
RowOfShapes row = textParagraphs.get(i).getRows().get(0);
if (wordCountsFirstRow[i] > 0)
assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
}
}
Aggregations