Search in sources :

Example 1 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SegmentationTest method testSegmentation.

@Test
public void testSegmentation() throws Exception {
    // TODO: Note currently this requires high thresholds to work
    // Need to decide if this is valid in general, or only for these samples
    System.setProperty("config.file", "src/test/resources/testHighThresholds.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    boolean writePixelsToLog = true;
    for (int imageNumber = 1; imageNumber <= 4; imageNumber++) {
        if (imageNumber != 1)
            continue;
        String imageName = "";
        String suffix = "";
        String text = "";
        String fileName = "";
        String userFileName;
        int rowCount = 2;
        int shapeCountRow1;
        int shapeCountRow2;
        int groupCountRow1;
        int groupCountRow2;
        int groupCountRow3 = 0;
        int shapeCountRow1Group1;
        int shapeCountRow2Group1;
        if (imageNumber == 1) {
            imageName = "MotlPeysiDemKhazns2RowsShort2";
            suffix = "jpg";
            text = "איך געה מיט אייך קיינער אין דער וועלט";
            fileName = "MotlPeysiDemKhazns2RowsShort2.pdf";
            userFileName = "Motl Peysi Dem Khazns";
            shapeCountRow1 = 13;
            shapeCountRow2 = 17;
            groupCountRow1 = 4;
            groupCountRow2 = 4;
            shapeCountRow1Group1 = 3;
            shapeCountRow2Group1 = 6;
        } else if (imageNumber == 2) {
            imageName = "MegileLiderZeresh";
            suffix = "png";
            text = "זרש, די מכשפה, װאָס שעלט ווי אַ מגפה";
            fileName = "MegileLiderManger.pdf";
            userFileName = "Megile Lider";
            shapeCountRow1 = 12;
            shapeCountRow2 = 17;
            groupCountRow1 = 3;
            groupCountRow2 = 5;
            shapeCountRow1Group1 = 4;
            shapeCountRow2Group1 = 4;
        } else if (imageNumber == 3) {
            imageName = "MendeleMoykherSforimVol1_41_0Excerpt";
            suffix = "png";
            text = "ער הייסט יאַנקיל, בעריל,";
            fileName = "MendeleMoykherSforimVol1_41_0.png";
            userFileName = "MendeleMoykherSforimVol1_41_0";
            shapeCountRow1 = 20;
            shapeCountRow2 = 0;
            groupCountRow1 = 4;
            groupCountRow2 = 0;
            shapeCountRow1Group1 = 2;
            shapeCountRow2Group1 = 0;
        } else {
            imageName = "JoinedLetterTest";
            suffix = "png";
            text = "Joined Letter Test";
            fileName = "JoinedLetterTest.png";
            userFileName = "JoinedLetterTest";
            rowCount = 2;
            shapeCountRow1 = 23;
            shapeCountRow2 = 23;
            groupCountRow1 = 4;
            groupCountRow2 = 4;
            groupCountRow3 = 5;
            shapeCountRow1Group1 = 6;
            shapeCountRow2Group1 = 5;
        }
        LOG.debug("######### imageName: " + imageName);
        // String fileName = "data/Zelmenyaners3Words.gif";
        InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/test/resources/" + imageName + "." + suffix);
        assertNotNull(imageFileStream);
        BufferedImage image = ImageIO.read(imageFileStream);
        JochreDocument doc = new JochreDocument(jochreSession);
        doc.setFileName(fileName);
        doc.setName(userFileName);
        JochrePage page = doc.newPage();
        SourceImage sourceImage = page.newJochreImage(image, imageName);
        sourceImage.setWhiteGapFillFactor(5);
        sourceImage.setImageStatus(ImageStatus.AUTO_NEW);
        if (writePixelsToLog) {
            LOG.debug("i012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
            for (int y = 0; y < sourceImage.getHeight(); y++) {
                String line = "" + y;
                for (int x = 0; x < sourceImage.getWidth(); x++) {
                    if (sourceImage.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                        line += "x";
                    else
                        line += "o";
                }
                LOG.debug(line);
            }
        }
        Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
        segmenter.segment();
        if (segmenter.isDrawSegmentation()) {
            BufferedImage segmentedImage = segmenter.getSegmentedImage();
            File tempDir = new File(System.getProperty("java.io.tmpdir"));
            ImageIO.write(segmentedImage, "PNG", new File(tempDir, imageName + "_seg.png"));
        }
        JochreImage jochreImage = sourceImage;
        int i = 0;
        boolean firstShape = true;
        int midPixelFirstShape = 0;
        int midPixelFirstShapeRaw = 0;
        for (Paragraph paragraph : jochreImage.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                int j = 0;
                LOG.debug("============= Row " + i + " ================");
                for (Shape shape : row.getShapes()) {
                    LOG.debug("Shape (" + i + "," + j + "). Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom() + ". Group: " + shape.getGroup().getIndex());
                    if (firstShape) {
                        midPixelFirstShape = shape.getPixel(3, 3);
                        midPixelFirstShapeRaw = shape.getRawPixel(3, 3);
                        firstShape = false;
                    }
                    if (writePixelsToLog) {
                        for (int y = 0; y < shape.getHeight(); y++) {
                            String line = "";
                            if (y == shape.getMeanLine())
                                line += "M";
                            else if (y == shape.getBaseLine())
                                line += "B";
                            else
                                line += y;
                            for (int x = 0; x < shape.getWidth(); x++) {
                                if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                                    line += "x";
                                else
                                    line += "o";
                            }
                            LOG.debug(line);
                        }
                    }
                    j++;
                }
                // next shape
                i++;
            }
        // next row
        }
        // next paragraph
        i = 0;
        for (Paragraph paragraph : jochreImage.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                for (GroupOfShapes group : row.getGroups()) {
                    for (Shape shape : group.getShapes()) {
                        if (i < text.length()) {
                            String letter = text.substring(i, i + 1);
                            String nextLetter = "";
                            if (i + 1 < text.length())
                                nextLetter = text.substring(i + 1, i + 2);
                            if (nextLetter.equals("ָֹ") || nextLetter.equals("ַ")) {
                                letter += nextLetter;
                                i++;
                            }
                            LOG.debug("Letter: " + letter);
                            shape.setLetter(letter);
                        }
                        i++;
                    }
                    // to skip the space
                    i++;
                    LOG.debug("Space");
                }
            // next group
            }
        // next row
        }
        // next paragraph
        List<ShapeFeature<?>> features = new ArrayList<ShapeFeature<?>>();
        features.add(new VerticalElongationFeature());
        features.add(new VerticalSizeFeature());
        features.add(new TouchesBaseLineFeature());
        features.add(new TouchesMeanLineFeature());
        features.add(new EmptyCentreFeature());
        i = 0;
        DecimalFormat df = new DecimalFormat("0.00");
        firstShape = true;
        int totalRowCount = 0;
        for (Paragraph paragraph : jochreImage.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                totalRowCount++;
                LOG.debug("============= Row " + i + " ================");
                int j = 0;
                for (GroupOfShapes group : row.getGroups()) {
                    for (Shape shape : group.getShapes()) {
                        LOG.debug("============= Shape (" + i + "," + j + ") ================");
                        LOG.debug("Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom());
                        LOG.debug("Letter " + shape.getLetter());
                        if (firstShape) {
                            LOG.debug("mid pixel: " + midPixelFirstShape);
                            assertEquals(midPixelFirstShape, shape.getPixel(3, 3));
                            LOG.debug("mid pixel raw: " + midPixelFirstShapeRaw);
                            assertEquals(midPixelFirstShapeRaw, shape.getRawPixel(3, 3));
                            firstShape = false;
                        }
                        if (writePixelsToLog) {
                            for (int y = 0; y < shape.getHeight(); y++) {
                                String line = "";
                                if (y == shape.getMeanLine())
                                    line += "M";
                                else if (y == shape.getBaseLine())
                                    line += "B";
                                else
                                    line += y;
                                for (int x = 0; x < shape.getWidth(); x++) {
                                    if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                                        line += "x";
                                    else
                                        line += "o";
                                }
                                LOG.debug(line);
                            }
                        }
                        double[][] totals = shape.getBrightnessBySection(5, 5, 1, SectionBrightnessMeasurementMethod.RAW);
                        LOG.debug("Brightness counts");
                        for (int y = 0; y < totals[0].length; y++) {
                            String line = "";
                            for (int x = 0; x < totals.length; x++) {
                                line += df.format(totals[x][y]) + "\t";
                            }
                            LOG.debug(line);
                        }
                        for (ShapeFeature<?> feature : features) {
                            RuntimeEnvironment env = new RuntimeEnvironment();
                            FeatureResult<?> outcome = feature.check(shape, env);
                            LOG.debug(outcome.toString());
                        }
                    }
                    if (i == 0) {
                        if (j == 0)
                            assertEquals(shapeCountRow1Group1, group.getShapes().size());
                    } else if (i == 1) {
                        if (j == 0)
                            assertEquals(shapeCountRow2Group1, group.getShapes().size());
                    }
                    j++;
                }
                if (i == 0)
                    assertEquals(groupCountRow1, row.getGroups().size());
                else if (i == 1)
                    assertEquals(groupCountRow2, row.getGroups().size());
                else if (i == 2)
                    assertEquals(groupCountRow3, row.getGroups().size());
                if (i == 0)
                    assertEquals(shapeCountRow1, row.getShapes().size());
                else if (i == 1)
                    assertEquals(shapeCountRow2, row.getShapes().size());
                i++;
            }
        // next row
        }
        // next paragraph
        assertEquals(rowCount, totalRowCount);
    }
    // next test image
    LOG.debug("************** Finished ***********");
}
Also used : ShapeFeature(com.joliciel.jochre.graphics.features.ShapeFeature) Shape(com.joliciel.jochre.graphics.Shape) SourceImage(com.joliciel.jochre.graphics.SourceImage) Config(com.typesafe.config.Config) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) VerticalSizeFeature(com.joliciel.jochre.graphics.features.VerticalSizeFeature) JochreDocument(com.joliciel.jochre.doc.JochreDocument) TouchesBaseLineFeature(com.joliciel.jochre.graphics.features.TouchesBaseLineFeature) BufferedImage(java.awt.image.BufferedImage) JochreSession(com.joliciel.jochre.JochreSession) EmptyCentreFeature(com.joliciel.jochre.graphics.features.EmptyCentreFeature) JochreImage(com.joliciel.jochre.graphics.JochreImage) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) TouchesMeanLineFeature(com.joliciel.jochre.graphics.features.TouchesMeanLineFeature) InputStream(java.io.InputStream) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) VerticalElongationFeature(com.joliciel.jochre.graphics.features.VerticalElongationFeature) Segmenter(com.joliciel.jochre.graphics.Segmenter) JochrePage(com.joliciel.jochre.doc.JochrePage) Paragraph(com.joliciel.jochre.graphics.Paragraph) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) File(java.io.File) Test(org.junit.Test)

Example 2 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class JochrePage method segment.

/**
 * For any Image on this page, segments it by converting to a JochreImage.
 */
public void segment() {
    int i = 0;
    for (JochreImage image : this.getImages()) {
        SourceImage sourceImage = (SourceImage) image;
        Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
        segmenter.segment();
        LOG.debug("Image " + i + " segmented: " + sourceImage.getName());
        i++;
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) SourceImage(com.joliciel.jochre.graphics.SourceImage) Segmenter(com.joliciel.jochre.graphics.Segmenter)

Example 3 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class JochrePage method newJochreImage.

public SourceImage newJochreImage(BufferedImage image, String imageName) {
    SourceImage jochreImage = new SourceImage(this, imageName, image, jochreSession);
    this.getImages().add(jochreImage);
    jochreImage.setIndex(this.getImages().size());
    return jochreImage;
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage)

Example 4 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class JochrePage method segmentAndShow.

/**
 * Segment any image on this page and output the segmentation into PNG files
 * so that they can be viewed by the user.
 */
public void segmentAndShow(String outputDirectory) {
    int i = 0;
    for (JochreImage image : this.getImages()) {
        SourceImage sourceImage = (SourceImage) image;
        Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
        segmenter.setDrawSegmentation(true);
        segmenter.segment();
        BufferedImage segmentedImage = segmenter.getSegmentedImage();
        try {
            ImageIO.write(segmentedImage, "PNG", new File(outputDirectory + "/" + image.getName() + "_seg.png"));
        } catch (IOException e) {
            throw new JochreException(e);
        }
        LOG.debug("Image " + i + " segmented: " + sourceImage.getName());
        i++;
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) JochreException(com.joliciel.jochre.utils.JochreException) SourceImage(com.joliciel.jochre.graphics.SourceImage) Segmenter(com.joliciel.jochre.graphics.Segmenter) IOException(java.io.IOException) File(java.io.File) BufferedImage(java.awt.image.BufferedImage)

Example 5 with SourceImage

use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.

the class SegmentationTest method testPietrushka.

/**
 * Pietrushka is a bit unusual in that it contains a column separator in the
 * middle. Given that it's in Yiddish, the columns need to be aligned from right
 * to left.
 *
 * @throws Exception
 */
@Test
public void testPietrushka(@Mocked JochrePage jochrePage) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "yi");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    String imageName = "Pietrushka_FolksEntsiklopedyeVol1_17_0.png";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(1832, 688, 1336, 1864);
    Rectangle textPar2 = new Rectangle(1848, 2608, 1312, 2200);
    Rectangle textPar3 = new Rectangle(1848, 4856, 1296, 94);
    Rectangle textPar4 = new Rectangle(448, 696, 1320, 2080);
    Rectangle textPar5 = new Rectangle(448, 2816, 1328, 2128);
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j);
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.95 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 18, 21, 1, 20, 21 };
    int[] wordCountsFirstRow = new int[] { 6, 7, 7, 8, 7 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals(rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        assertEquals(wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Aggregations

SourceImage (com.joliciel.jochre.graphics.SourceImage)11 Segmenter (com.joliciel.jochre.graphics.Segmenter)9 BufferedImage (java.awt.image.BufferedImage)9 JochreSession (com.joliciel.jochre.JochreSession)8 Config (com.typesafe.config.Config)8 InputStream (java.io.InputStream)8 Test (org.junit.Test)8 Paragraph (com.joliciel.jochre.graphics.Paragraph)7 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)7 ArrayList (java.util.ArrayList)7 Rectangle (java.awt.Rectangle)6 HashMap (java.util.HashMap)6 JochreImage (com.joliciel.jochre.graphics.JochreImage)4 Expectations (mockit.Expectations)3 JochrePage (com.joliciel.jochre.doc.JochrePage)2 Shape (com.joliciel.jochre.graphics.Shape)2 File (java.io.File)2 JochreDocument (com.joliciel.jochre.doc.JochreDocument)1 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)1 EmptyCentreFeature (com.joliciel.jochre.graphics.features.EmptyCentreFeature)1