Search in sources :

Example 6 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SegmentationTest method testMotlPessiDemKhazns.

/**
 * A very simple, basic page.
 *
 * @throws Exception
 */
@Test
public void testMotlPessiDemKhazns(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "SholemAleykhem_MotelPeysiDemKhazns_12_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(544, 824, 2432, 344);
    Rectangle textPar2 = new Rectangle(552, 1176, 2512, 2112);
    Rectangle textPar3 = new Rectangle(584, 3320, 2448, 344);
    Rectangle textPar4 = new Rectangle(568, 3688, 2464, 592);
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.95 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 3, 18, 3, 5 };
    int[] wordCountsFirstRow = new int[] { 6, 8, 8, 8 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals(rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        assertEquals(wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 7 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SegmentationTest method testAlsacien2.

/**
 * Segmentation errors reported for Alsacien.
 *
 * @throws Exception
 */
@Test
public void testAlsacien2(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "Alsacien2.jpeg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(63, 81, 1059, 108);
    Rectangle textPar2 = new Rectangle(66, 204, 1065, 294);
    Rectangle textPar3 = new Rectangle(63, 516, 1068, 348);
    Rectangle textPar4 = new Rectangle(63, 879, 1071, 537);
    Rectangle textPar5 = new Rectangle(63, 1428, 1068, 354);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
            if (i >= textPars.size())
                break;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 2, 5, 6, 9, 6 };
    int[] wordCountsFirstRow = new int[] { 10, 8, 9, 8, 8 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 8 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SegmentationTest method testPerets_KhsidisheMayselekh.

/**
 * This page is challenging because of the large blotch of dirt in the lower
 * right. Also, there are several short, indented, one-line paragraphs.
 *
 * @throws Exception
 */
@Test
public void testPerets_KhsidisheMayselekh(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "Peretz_KhsidisheMayselekh_5_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(1670, 920, 624, 182);
    Rectangle textPar2 = new Rectangle(740, 1524, 2368, 96);
    Rectangle textPar3 = new Rectangle(680, 1652, 2620, 192);
    Rectangle textPar4 = new Rectangle(652, 1872, 2648, 800);
    Rectangle textPar5 = new Rectangle(1564, 2696, 1524, 96);
    Rectangle textPar6 = new Rectangle(660, 2820, 2632, 328);
    Rectangle textPar7 = new Rectangle(660, 3176, 2628, 212);
    Rectangle textPar8 = new Rectangle(664, 3404, 2636, 428);
    Rectangle textPar9 = new Rectangle(1992, 3868, 1088, 100);
    Rectangle textPar10 = new Rectangle(664, 4000, 2624, 436);
    Rectangle textPar11 = new Rectangle(664, 4468, 2628, 204);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    textPars.add(textPar6);
    textPars.add(textPar7);
    textPars.add(textPar8);
    // short paragraph
    textPars.add(textPar9);
    // paragraph with blotch
    textPars.add(textPar10);
    textPars.add(textPar11);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 1, 2, 7, 1, 3, 2, 4, 1, 4, 2 };
    int[] wordCountsFirstRow = new int[] { 1, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 9 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SplitCandidateFinderImplTest method testFindSplitCanidates.

@Test
public void testFindSplitCanidates() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    InputStream imageFileStream = getClass().getResourceAsStream("shape_370454.png");
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    final JochrePage page = mock(JochrePage.class);
    JochreImage jochreImage = new SourceImage(page, "name", image, jochreSession);
    Shape shape = jochreImage.getShape(0, 0, jochreImage.getWidth() - 1, jochreImage.getHeight() - 1);
    SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
    List<Split> splits = splitCandidateFinder.findSplitCandidates(shape);
    int[] trueSplitPositions = new int[] { 38, 59, 82 };
    boolean[] foundSplit = new boolean[] { false, false, false };
    for (Split splitCandidate : splits) {
        LOG.debug("Split candidate at " + splitCandidate.getPosition());
        for (int i = 0; i < trueSplitPositions.length; i++) {
            int truePos = trueSplitPositions[i];
            int distance = splitCandidate.getPosition() - truePos;
            if (distance < 0)
                distance = 0 - distance;
            if (distance < splitCandidateFinder.getMinDistanceBetweenSplits()) {
                foundSplit[i] = true;
                LOG.debug("Found split: " + truePos + ", distance " + distance);
            }
        }
    }
    for (int i = 0; i < trueSplitPositions.length; i++) {
        assertTrue("didn't find split " + trueSplitPositions[i], foundSplit[i]);
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) SourceImage(com.joliciel.jochre.graphics.SourceImage) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) JochrePage(com.joliciel.jochre.doc.JochrePage) BufferedImage(java.awt.image.BufferedImage) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 10 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SegmentationTest method testAlsacien1.

/**
 * Segmentation errors reported for Alsacien.
 *
 * @throws Exception
 */
@Test
public void testAlsacien1(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "Alsacien1.jpg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(715, 517, 462, 115);
    // TODO: for now it's splitting this paragraph by row, since it's assuming
    // paragraphs cannot be
    // both outdented and indented on the same page
    // Rectangle textPar2 = new Rectangle(50, 666, 1798, 1039);
    Rectangle textPar3 = new Rectangle(55, 1837, 1777, 335);
    Rectangle textPar4 = new Rectangle(50, 2211, 1765, 154);
    Rectangle textPar5 = new Rectangle(44, 2404, 1782, 511);
    Rectangle textPar6 = new Rectangle(50, 2948, 1776, 154);
    Rectangle textPar7 = new Rectangle(50, 3135, 1770, 77);
    // title paragraph
    textPars.add(textPar1);
    // textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    textPars.add(textPar6);
    textPars.add(textPar7);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 4, 2, 6, 2, 1 };
    int[] wordCountsFirstRow = new int[] { 2, 0, 0, 0, 0, 0, 0 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Aggregations

SourceImage (com.joliciel.jochre.graphics.SourceImage)11 Segmenter (com.joliciel.jochre.graphics.Segmenter)9 BufferedImage (java.awt.image.BufferedImage)9 JochreSession (com.joliciel.jochre.JochreSession)8 Config (com.typesafe.config.Config)8 InputStream (java.io.InputStream)8 Test (org.junit.Test)8 Paragraph (com.joliciel.jochre.graphics.Paragraph)7 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)7 ArrayList (java.util.ArrayList)7 Rectangle (java.awt.Rectangle)6 HashMap (java.util.HashMap)6 JochreImage (com.joliciel.jochre.graphics.JochreImage)4 Expectations (mockit.Expectations)3 JochrePage (com.joliciel.jochre.doc.JochrePage)2 Shape (com.joliciel.jochre.graphics.Shape)2 File (java.io.File)2 JochreDocument (com.joliciel.jochre.doc.JochreDocument)1 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)1 EmptyCentreFeature (com.joliciel.jochre.graphics.features.EmptyCentreFeature)1