Search in sources :

Example 6 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class ImageController method reloadRowGrid.

void reloadRowGrid() {
    LOG.trace("reloadRowGrid");
    List<RowOfShapes> imageRows = new ArrayList<RowOfShapes>();
    for (Paragraph paragraph : currentImage.getParagraphs()) {
        for (RowOfShapes row : paragraph.getRows()) {
            imageRows.add(row);
        }
    }
    currentTextBoxes = new HashMap<RowOfShapes, Textbox>();
    rowGrid.setModel(new SimpleListModel<RowOfShapes>(imageRows));
}
Also used : ArrayList(java.util.ArrayList) Textbox(org.zkoss.zul.Textbox) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Paragraph(com.joliciel.jochre.graphics.Paragraph)

Example 7 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class SegmentationTest method testPietrushka.

/**
 * Pietrushka is a bit unusual in that it contains a column separator in the
 * middle. Given that it's in Yiddish, the columns need to be aligned from right
 * to left.
 *
 * @throws Exception
 */
@Test
public void testPietrushka(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "Pietrushka_FolksEntsiklopedyeVol1_17_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(1832, 688, 1336, 1864);
    Rectangle textPar2 = new Rectangle(1848, 2608, 1312, 2200);
    Rectangle textPar3 = new Rectangle(1848, 4856, 1296, 94);
    Rectangle textPar4 = new Rectangle(448, 696, 1320, 2080);
    Rectangle textPar5 = new Rectangle(448, 2816, 1328, 2128);
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.95 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 18, 21, 1, 20, 21 };
    int[] wordCountsFirstRow = new int[] { 6, 7, 7, 8, 7 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals(rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        assertEquals(wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 8 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class SegmentationTest method testMotlPessiDemKhazns.

/**
 * A very simple, basic page.
 *
 * @throws Exception
 */
@Test
public void testMotlPessiDemKhazns(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "SholemAleykhem_MotelPeysiDemKhazns_12_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(544, 824, 2432, 344);
    Rectangle textPar2 = new Rectangle(552, 1176, 2512, 2112);
    Rectangle textPar3 = new Rectangle(584, 3320, 2448, 344);
    Rectangle textPar4 = new Rectangle(568, 3688, 2464, 592);
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.95 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 3, 18, 3, 5 };
    int[] wordCountsFirstRow = new int[] { 6, 8, 8, 8 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals(rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        assertEquals(wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 9 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class SegmentationTest method testAlsacien2.

/**
 * Segmentation errors reported for Alsacien.
 *
 * @throws Exception
 */
@Test
public void testAlsacien2(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "Alsacien2.jpeg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(63, 81, 1059, 108);
    Rectangle textPar2 = new Rectangle(66, 204, 1065, 294);
    Rectangle textPar3 = new Rectangle(63, 516, 1068, 348);
    Rectangle textPar4 = new Rectangle(63, 879, 1071, 537);
    Rectangle textPar5 = new Rectangle(63, 1428, 1068, 354);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
            if (i >= textPars.size())
                break;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 2, 5, 6, 9, 6 };
    int[] wordCountsFirstRow = new int[] { 10, 8, 9, 8, 8 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 10 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class SegmentationTest method testPerets_KhsidisheMayselekh.

/**
 * This page is challenging because of the large blotch of dirt in the lower
 * right. Also, there are several short, indented, one-line paragraphs.
 *
 * @throws Exception
 */
@Test
public void testPerets_KhsidisheMayselekh(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "Peretz_KhsidisheMayselekh_5_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(1670, 920, 624, 182);
    Rectangle textPar2 = new Rectangle(740, 1524, 2368, 96);
    Rectangle textPar3 = new Rectangle(680, 1652, 2620, 192);
    Rectangle textPar4 = new Rectangle(652, 1872, 2648, 800);
    Rectangle textPar5 = new Rectangle(1564, 2696, 1524, 96);
    Rectangle textPar6 = new Rectangle(660, 2820, 2632, 328);
    Rectangle textPar7 = new Rectangle(660, 3176, 2628, 212);
    Rectangle textPar8 = new Rectangle(664, 3404, 2636, 428);
    Rectangle textPar9 = new Rectangle(1992, 3868, 1088, 100);
    Rectangle textPar10 = new Rectangle(664, 4000, 2624, 436);
    Rectangle textPar11 = new Rectangle(664, 4468, 2628, 204);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    textPars.add(textPar6);
    textPars.add(textPar7);
    textPars.add(textPar8);
    // short paragraph
    textPars.add(textPar9);
    // paragraph with blotch
    textPars.add(textPar10);
    textPars.add(textPar11);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 1, 2, 7, 1, 3, 2, 4, 1, 4, 2 };
    int[] wordCountsFirstRow = new int[] { 1, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Aggregations

Paragraph (com.joliciel.jochre.graphics.Paragraph)17 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)17 ArrayList (java.util.ArrayList)12 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)10 Test (org.junit.Test)10 Shape (com.joliciel.jochre.graphics.Shape)9 JochreSession (com.joliciel.jochre.JochreSession)8 Config (com.typesafe.config.Config)8 Segmenter (com.joliciel.jochre.graphics.Segmenter)7 SourceImage (com.joliciel.jochre.graphics.SourceImage)7 BufferedImage (java.awt.image.BufferedImage)7 InputStream (java.io.InputStream)7 JochreImage (com.joliciel.jochre.graphics.JochreImage)6 Rectangle (java.awt.Rectangle)6 HashMap (java.util.HashMap)6 JochreDocument (com.joliciel.jochre.doc.JochreDocument)4 JochrePage (com.joliciel.jochre.doc.JochrePage)4 StringWriter (java.io.StringWriter)3 Expectations (mockit.Expectations)3 LetterSequence (com.joliciel.jochre.letterGuesser.LetterSequence)2