Search in sources :

Example 6 with Segmenter

use of com.joliciel.jochre.graphics.Segmenter in project jochre by urieli.

the class SegmentationTest method testAlsacien2.

/**
 * Segmentation errors reported for Alsacien.
 *
 * @throws Exception
 */
@Test
public void testAlsacien2(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "Alsacien2.jpeg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(63, 81, 1059, 108);
    Rectangle textPar2 = new Rectangle(66, 204, 1065, 294);
    Rectangle textPar3 = new Rectangle(63, 516, 1068, 348);
    Rectangle textPar4 = new Rectangle(63, 879, 1071, 537);
    Rectangle textPar5 = new Rectangle(63, 1428, 1068, 354);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
            if (i >= textPars.size())
                break;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 2, 5, 6, 9, 6 };
    int[] wordCountsFirstRow = new int[] { 10, 8, 9, 8, 8 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 7 with Segmenter

use of com.joliciel.jochre.graphics.Segmenter in project jochre by urieli.

the class SegmentationTest method testPerets_KhsidisheMayselekh.

/**
 * This page is challenging because of the large blotch of dirt in the lower
 * right. Also, there are several short, indented, one-line paragraphs.
 *
 * @throws Exception
 */
@Test
public void testPerets_KhsidisheMayselekh(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "Peretz_KhsidisheMayselekh_5_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(1670, 920, 624, 182);
    Rectangle textPar2 = new Rectangle(740, 1524, 2368, 96);
    Rectangle textPar3 = new Rectangle(680, 1652, 2620, 192);
    Rectangle textPar4 = new Rectangle(652, 1872, 2648, 800);
    Rectangle textPar5 = new Rectangle(1564, 2696, 1524, 96);
    Rectangle textPar6 = new Rectangle(660, 2820, 2632, 328);
    Rectangle textPar7 = new Rectangle(660, 3176, 2628, 212);
    Rectangle textPar8 = new Rectangle(664, 3404, 2636, 428);
    Rectangle textPar9 = new Rectangle(1992, 3868, 1088, 100);
    Rectangle textPar10 = new Rectangle(664, 4000, 2624, 436);
    Rectangle textPar11 = new Rectangle(664, 4468, 2628, 204);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    textPars.add(textPar6);
    textPars.add(textPar7);
    textPars.add(textPar8);
    // short paragraph
    textPars.add(textPar9);
    // paragraph with blotch
    textPars.add(textPar10);
    textPars.add(textPar11);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 1, 2, 7, 1, 3, 2, 4, 1, 4, 2 };
    int[] wordCountsFirstRow = new int[] { 1, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 8 with Segmenter

use of com.joliciel.jochre.graphics.Segmenter in project jochre by urieli.

the class SegmentationTest method testAlsacien1.

/**
 * Segmentation errors reported for Alsacien.
 *
 * @throws Exception
 */
@Test
public void testAlsacien1(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "Alsacien1.jpg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(715, 517, 462, 115);
    // TODO: for now it's splitting this paragraph by row, since it's assuming
    // paragraphs cannot be
    // both outdented and indented on the same page
    // Rectangle textPar2 = new Rectangle(50, 666, 1798, 1039);
    Rectangle textPar3 = new Rectangle(55, 1837, 1777, 335);
    Rectangle textPar4 = new Rectangle(50, 2211, 1765, 154);
    Rectangle textPar5 = new Rectangle(44, 2404, 1782, 511);
    Rectangle textPar6 = new Rectangle(50, 2948, 1776, 154);
    Rectangle textPar7 = new Rectangle(50, 3135, 1770, 77);
    // title paragraph
    textPars.add(textPar1);
    // textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    textPars.add(textPar6);
    textPars.add(textPar7);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 4, 2, 6, 2, 1 };
    int[] wordCountsFirstRow = new int[] { 2, 0, 0, 0, 0, 0, 0 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 9 with Segmenter

use of com.joliciel.jochre.graphics.Segmenter in project jochre by urieli.

the class SegmentationTest method testAlsacienPlay3.

/**
 * Segmentation errors reported for Alsacien play - challenging because of the
 * unusual indentation.
 *
 * @throws Exception
 */
@Test
public void testAlsacienPlay3(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "AlsacienPlay3.jpg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(712, 532, 556, 52);
    Rectangle textPar2 = new Rectangle(324, 600, 1324, 128);
    Rectangle textPar3 = new Rectangle(680, 730, 592, 50);
    Rectangle textPar4 = new Rectangle(404, 808, 684, 48);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
            if (i >= textPars.size())
                break;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 2, 1, 1 };
    // TODO: words in "spaced" rows (uses spacing to emphasize instead of bold
    // or italics) get split
    // should try to detect multiple single letter words
    int[] wordCountsFirstRow = new int[] { 0, 10, 0, 5 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Aggregations

Segmenter (com.joliciel.jochre.graphics.Segmenter)9 SourceImage (com.joliciel.jochre.graphics.SourceImage)9 BufferedImage (java.awt.image.BufferedImage)8 JochreSession (com.joliciel.jochre.JochreSession)7 Paragraph (com.joliciel.jochre.graphics.Paragraph)7 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)7 Config (com.typesafe.config.Config)7 InputStream (java.io.InputStream)7 ArrayList (java.util.ArrayList)7 Test (org.junit.Test)7 Rectangle (java.awt.Rectangle)6 HashMap (java.util.HashMap)6 JochreImage (com.joliciel.jochre.graphics.JochreImage)3 Expectations (mockit.Expectations)3 File (java.io.File)2 JochreDocument (com.joliciel.jochre.doc.JochreDocument)1 JochrePage (com.joliciel.jochre.doc.JochrePage)1 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)1 Shape (com.joliciel.jochre.graphics.Shape)1 EmptyCentreFeature (com.joliciel.jochre.graphics.features.EmptyCentreFeature)1