Search in sources :

Example 1 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class SegmentationTest method testSegmentation.

@Test
public void testSegmentation() throws Exception {
    // TODO: Note currently this requires high thresholds to work
    // Need to decide if this is valid in general, or only for these samples
    System.setProperty("config.file", "src/test/resources/testHighThresholds.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    boolean writePixelsToLog = true;
    for (int imageNumber = 1; imageNumber <= 4; imageNumber++) {
        if (imageNumber != 1)
            continue;
        String imageName = "";
        String suffix = "";
        String text = "";
        String fileName = "";
        String userFileName;
        int rowCount = 2;
        int shapeCountRow1;
        int shapeCountRow2;
        int groupCountRow1;
        int groupCountRow2;
        int groupCountRow3 = 0;
        int shapeCountRow1Group1;
        int shapeCountRow2Group1;
        if (imageNumber == 1) {
            imageName = "MotlPeysiDemKhazns2RowsShort2";
            suffix = "jpg";
            text = "איך געה מיט אייך קיינער אין דער וועלט";
            fileName = "MotlPeysiDemKhazns2RowsShort2.pdf";
            userFileName = "Motl Peysi Dem Khazns";
            shapeCountRow1 = 13;
            shapeCountRow2 = 17;
            groupCountRow1 = 4;
            groupCountRow2 = 4;
            shapeCountRow1Group1 = 3;
            shapeCountRow2Group1 = 6;
        } else if (imageNumber == 2) {
            imageName = "MegileLiderZeresh";
            suffix = "png";
            text = "זרש, די מכשפה, װאָס שעלט ווי אַ מגפה";
            fileName = "MegileLiderManger.pdf";
            userFileName = "Megile Lider";
            shapeCountRow1 = 12;
            shapeCountRow2 = 17;
            groupCountRow1 = 3;
            groupCountRow2 = 5;
            shapeCountRow1Group1 = 4;
            shapeCountRow2Group1 = 4;
        } else if (imageNumber == 3) {
            imageName = "MendeleMoykherSforimVol1_41_0Excerpt";
            suffix = "png";
            text = "ער הייסט יאַנקיל, בעריל,";
            fileName = "MendeleMoykherSforimVol1_41_0.png";
            userFileName = "MendeleMoykherSforimVol1_41_0";
            shapeCountRow1 = 20;
            shapeCountRow2 = 0;
            groupCountRow1 = 4;
            groupCountRow2 = 0;
            shapeCountRow1Group1 = 2;
            shapeCountRow2Group1 = 0;
        } else {
            imageName = "JoinedLetterTest";
            suffix = "png";
            text = "Joined Letter Test";
            fileName = "JoinedLetterTest.png";
            userFileName = "JoinedLetterTest";
            rowCount = 2;
            shapeCountRow1 = 23;
            shapeCountRow2 = 23;
            groupCountRow1 = 4;
            groupCountRow2 = 4;
            groupCountRow3 = 5;
            shapeCountRow1Group1 = 6;
            shapeCountRow2Group1 = 5;
        }
        LOG.debug("######### imageName: " + imageName);
        // String fileName = "data/Zelmenyaners3Words.gif";
        InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/test/resources/" + imageName + "." + suffix);
        assertNotNull(imageFileStream);
        BufferedImage image = ImageIO.read(imageFileStream);
        JochreDocument doc = new JochreDocument(jochreSession);
        doc.setFileName(fileName);
        doc.setName(userFileName);
        JochrePage page = doc.newPage();
        SourceImage sourceImage = page.newJochreImage(image, imageName);
        sourceImage.setWhiteGapFillFactor(5);
        sourceImage.setImageStatus(ImageStatus.AUTO_NEW);
        if (writePixelsToLog) {
            LOG.debug("i012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
            for (int y = 0; y < sourceImage.getHeight(); y++) {
                String line = "" + y;
                for (int x = 0; x < sourceImage.getWidth(); x++) {
                    if (sourceImage.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                        line += "x";
                    else
                        line += "o";
                }
                LOG.debug(line);
            }
        }
        Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
        segmenter.segment();
        if (segmenter.isDrawSegmentation()) {
            BufferedImage segmentedImage = segmenter.getSegmentedImage();
            File tempDir = new File(System.getProperty("java.io.tmpdir"));
            ImageIO.write(segmentedImage, "PNG", new File(tempDir, imageName + "_seg.png"));
        }
        JochreImage jochreImage = sourceImage;
        int i = 0;
        boolean firstShape = true;
        int midPixelFirstShape = 0;
        int midPixelFirstShapeRaw = 0;
        for (Paragraph paragraph : jochreImage.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                int j = 0;
                LOG.debug("============= Row " + i + " ================");
                for (Shape shape : row.getShapes()) {
                    LOG.debug("Shape (" + i + "," + j + "). Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom() + ". Group: " + shape.getGroup().getIndex());
                    if (firstShape) {
                        midPixelFirstShape = shape.getPixel(3, 3);
                        midPixelFirstShapeRaw = shape.getRawPixel(3, 3);
                        firstShape = false;
                    }
                    if (writePixelsToLog) {
                        for (int y = 0; y < shape.getHeight(); y++) {
                            String line = "";
                            if (y == shape.getMeanLine())
                                line += "M";
                            else if (y == shape.getBaseLine())
                                line += "B";
                            else
                                line += y;
                            for (int x = 0; x < shape.getWidth(); x++) {
                                if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                                    line += "x";
                                else
                                    line += "o";
                            }
                            LOG.debug(line);
                        }
                    }
                    j++;
                }
                // next shape
                i++;
            }
        // next row
        }
        // next paragraph
        i = 0;
        for (Paragraph paragraph : jochreImage.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                for (GroupOfShapes group : row.getGroups()) {
                    for (Shape shape : group.getShapes()) {
                        if (i < text.length()) {
                            String letter = text.substring(i, i + 1);
                            String nextLetter = "";
                            if (i + 1 < text.length())
                                nextLetter = text.substring(i + 1, i + 2);
                            if (nextLetter.equals("ָֹ") || nextLetter.equals("ַ")) {
                                letter += nextLetter;
                                i++;
                            }
                            LOG.debug("Letter: " + letter);
                            shape.setLetter(letter);
                        }
                        i++;
                    }
                    // to skip the space
                    i++;
                    LOG.debug("Space");
                }
            // next group
            }
        // next row
        }
        // next paragraph
        List<ShapeFeature<?>> features = new ArrayList<ShapeFeature<?>>();
        features.add(new VerticalElongationFeature());
        features.add(new VerticalSizeFeature());
        features.add(new TouchesBaseLineFeature());
        features.add(new TouchesMeanLineFeature());
        features.add(new EmptyCentreFeature());
        i = 0;
        DecimalFormat df = new DecimalFormat("0.00");
        firstShape = true;
        int totalRowCount = 0;
        for (Paragraph paragraph : jochreImage.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                totalRowCount++;
                LOG.debug("============= Row " + i + " ================");
                int j = 0;
                for (GroupOfShapes group : row.getGroups()) {
                    for (Shape shape : group.getShapes()) {
                        LOG.debug("============= Shape (" + i + "," + j + ") ================");
                        LOG.debug("Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom());
                        LOG.debug("Letter " + shape.getLetter());
                        if (firstShape) {
                            LOG.debug("mid pixel: " + midPixelFirstShape);
                            assertEquals(midPixelFirstShape, shape.getPixel(3, 3));
                            LOG.debug("mid pixel raw: " + midPixelFirstShapeRaw);
                            assertEquals(midPixelFirstShapeRaw, shape.getRawPixel(3, 3));
                            firstShape = false;
                        }
                        if (writePixelsToLog) {
                            for (int y = 0; y < shape.getHeight(); y++) {
                                String line = "";
                                if (y == shape.getMeanLine())
                                    line += "M";
                                else if (y == shape.getBaseLine())
                                    line += "B";
                                else
                                    line += y;
                                for (int x = 0; x < shape.getWidth(); x++) {
                                    if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                                        line += "x";
                                    else
                                        line += "o";
                                }
                                LOG.debug(line);
                            }
                        }
                        double[][] totals = shape.getBrightnessBySection(5, 5, 1, SectionBrightnessMeasurementMethod.RAW);
                        LOG.debug("Brightness counts");
                        for (int y = 0; y < totals[0].length; y++) {
                            String line = "";
                            for (int x = 0; x < totals.length; x++) {
                                line += df.format(totals[x][y]) + "\t";
                            }
                            LOG.debug(line);
                        }
                        for (ShapeFeature<?> feature : features) {
                            RuntimeEnvironment env = new RuntimeEnvironment();
                            FeatureResult<?> outcome = feature.check(shape, env);
                            LOG.debug(outcome.toString());
                        }
                    }
                    if (i == 0) {
                        if (j == 0)
                            assertEquals(shapeCountRow1Group1, group.getShapes().size());
                    } else if (i == 1) {
                        if (j == 0)
                            assertEquals(shapeCountRow2Group1, group.getShapes().size());
                    }
                    j++;
                }
                if (i == 0)
                    assertEquals(groupCountRow1, row.getGroups().size());
                else if (i == 1)
                    assertEquals(groupCountRow2, row.getGroups().size());
                else if (i == 2)
                    assertEquals(groupCountRow3, row.getGroups().size());
                if (i == 0)
                    assertEquals(shapeCountRow1, row.getShapes().size());
                else if (i == 1)
                    assertEquals(shapeCountRow2, row.getShapes().size());
                i++;
            }
        // next row
        }
        // next paragraph
        assertEquals(rowCount, totalRowCount);
    }
    // next test image
    LOG.debug("************** Finished ***********");
}
Also used : ShapeFeature(com.joliciel.jochre.graphics.features.ShapeFeature) Shape(com.joliciel.jochre.graphics.Shape) SourceImage(com.joliciel.jochre.graphics.SourceImage) Config(com.typesafe.config.Config) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) VerticalSizeFeature(com.joliciel.jochre.graphics.features.VerticalSizeFeature) JochreDocument(com.joliciel.jochre.doc.JochreDocument) TouchesBaseLineFeature(com.joliciel.jochre.graphics.features.TouchesBaseLineFeature) BufferedImage(java.awt.image.BufferedImage) JochreSession(com.joliciel.jochre.JochreSession) EmptyCentreFeature(com.joliciel.jochre.graphics.features.EmptyCentreFeature) JochreImage(com.joliciel.jochre.graphics.JochreImage) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) TouchesMeanLineFeature(com.joliciel.jochre.graphics.features.TouchesMeanLineFeature) InputStream(java.io.InputStream) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) VerticalElongationFeature(com.joliciel.jochre.graphics.features.VerticalElongationFeature) Segmenter(com.joliciel.jochre.graphics.Segmenter) JochrePage(com.joliciel.jochre.doc.JochrePage) Paragraph(com.joliciel.jochre.graphics.Paragraph) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) File(java.io.File) Test(org.junit.Test)

Example 2 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class TextGetterImplTest method testGetTextFontSizes.

@Test
public void testGetTextFontSizes() {
    final JochreDocument doc = mock(JochreDocument.class);
    final JochrePage page = mock(JochrePage.class);
    final JochreImage jochreImage = mock(JochreImage.class);
    final Paragraph paragraph = mock(Paragraph.class);
    final RowOfShapes row = mock(RowOfShapes.class);
    final GroupOfShapes group1 = mock(GroupOfShapes.class);
    final GroupOfShapes group2 = mock(GroupOfShapes.class);
    final GroupOfShapes group3 = mock(GroupOfShapes.class);
    final GroupOfShapes group4 = mock(GroupOfShapes.class);
    final Shape shape1 = mock(Shape.class);
    final Shape shape2 = mock(Shape.class);
    final Shape shape3 = mock(Shape.class);
    final Shape shape4 = mock(Shape.class);
    final List<Paragraph> paragraphs = new ArrayList<>();
    paragraphs.add(paragraph);
    final List<RowOfShapes> rows = new ArrayList<>();
    rows.add(row);
    when(jochreImage.getPage()).thenReturn(page);
    when(page.getDocument()).thenReturn(doc);
    when(doc.isLeftToRight()).thenReturn(true);
    when(jochreImage.getParagraphs()).thenReturn(paragraphs);
    when(paragraph.getRows()).thenReturn(rows);
    List<GroupOfShapes> groups = new ArrayList<>();
    groups.add(group1);
    groups.add(group2);
    groups.add(group3);
    groups.add(group4);
    when(row.getGroups()).thenReturn(groups);
    List<Shape> shapes1 = new ArrayList<>();
    shapes1.add(shape1);
    when(group1.getShapes()).thenReturn(shapes1);
    when(group1.getXHeight()).thenReturn(10);
    List<Shape> shapes2 = new ArrayList<>();
    shapes2.add(shape2);
    when(group2.getShapes()).thenReturn(shapes2);
    when(group2.getXHeight()).thenReturn(20);
    List<Shape> shapes3 = new ArrayList<>();
    shapes3.add(shape3);
    when(group3.getShapes()).thenReturn(shapes3);
    when(group3.getXHeight()).thenReturn(10);
    List<Shape> shapes4 = new ArrayList<>();
    shapes4.add(shape4);
    when(group4.getShapes()).thenReturn(shapes4);
    when(group4.getXHeight()).thenReturn(5);
    when(shape1.getLetter()).thenReturn("A");
    when(shape1.getXHeight()).thenReturn(10);
    when(shape2.getLetter()).thenReturn("B");
    when(shape2.getXHeight()).thenReturn(20);
    when(shape3.getLetter()).thenReturn("C");
    when(shape3.getXHeight()).thenReturn(10);
    when(shape4.getLetter()).thenReturn("D");
    when(shape4.getXHeight()).thenReturn(5);
    StringWriter writer = new StringWriter();
    TextGetter textGetter = new TextGetter(writer, TextFormat.XHTML);
    textGetter.onImageComplete(jochreImage);
    String result = writer.toString();
    LOG.debug(result);
    assertEquals("<p dir=\"rtl\">A <big>B </big>C <small>D </small></p>", result);
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) JochreDocument(com.joliciel.jochre.doc.JochreDocument) JochrePage(com.joliciel.jochre.doc.JochrePage) Paragraph(com.joliciel.jochre.graphics.Paragraph) StringWriter(java.io.StringWriter) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) Test(org.junit.Test)

Example 3 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class TextGetterImplTest method testGetText.

@Test
public void testGetText() {
    final JochreDocument doc = mock(JochreDocument.class);
    final JochrePage page = mock(JochrePage.class);
    final JochreImage jochreImage = mock(JochreImage.class);
    final Paragraph paragraph = mock(Paragraph.class);
    final RowOfShapes row = mock(RowOfShapes.class);
    final GroupOfShapes group = mock(GroupOfShapes.class);
    final Shape shape1 = mock(Shape.class);
    final Shape shape2 = mock(Shape.class);
    final Shape shape3 = mock(Shape.class);
    final Shape shape4 = mock(Shape.class);
    final Shape shape5 = mock(Shape.class);
    final Shape shape6 = mock(Shape.class);
    final Shape shape7 = mock(Shape.class);
    final Shape shape8 = mock(Shape.class);
    final Shape shape9 = mock(Shape.class);
    final List<Paragraph> paragraphs = new ArrayList<>();
    paragraphs.add(paragraph);
    final List<RowOfShapes> rows = new ArrayList<>();
    rows.add(row);
    final List<GroupOfShapes> groups = new ArrayList<>();
    groups.add(group);
    when(jochreImage.getPage()).thenReturn(page);
    when(page.getDocument()).thenReturn(doc);
    when(doc.isLeftToRight()).thenReturn(false);
    when(jochreImage.getParagraphs()).thenReturn(paragraphs);
    when(paragraph.getRows()).thenReturn(rows);
    when(row.getGroups()).thenReturn(groups);
    List<Shape> shapes = new ArrayList<>();
    shapes.add(shape1);
    shapes.add(shape2);
    shapes.add(shape3);
    shapes.add(shape4);
    shapes.add(shape5);
    shapes.add(shape6);
    shapes.add(shape7);
    shapes.add(shape8);
    shapes.add(shape9);
    when(group.getShapes()).thenReturn(shapes);
    when(group.getXHeight()).thenReturn(10);
    when(shape1.getLetter()).thenReturn(",");
    when(shape2.getLetter()).thenReturn(",");
    when(shape3.getLetter()).thenReturn("|אַ");
    when(shape4.getLetter()).thenReturn("אַ|");
    when(shape5.getLetter()).thenReturn("|m");
    when(shape6.getLetter()).thenReturn("m|");
    when(shape7.getLetter()).thenReturn("|ש");
    when(shape8.getLetter()).thenReturn("ע|");
    when(shape9.getLetter()).thenReturn(",");
    StringWriter writer = new StringWriter();
    TextGetter textGetter = new TextGetter(writer, TextFormat.PLAIN);
    textGetter.onImageComplete(jochreImage);
    String result = writer.toString();
    LOG.debug(result);
    assertEquals("„אַm|שע|, \n", result);
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) JochreDocument(com.joliciel.jochre.doc.JochreDocument) JochrePage(com.joliciel.jochre.doc.JochrePage) Paragraph(com.joliciel.jochre.graphics.Paragraph) StringWriter(java.io.StringWriter) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) Test(org.junit.Test)

Example 4 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class TextGetter method onImageComplete.

@Override
public void onImageComplete(JochreImage image) {
    try {
        double minRatioBiggerFont = 1.15;
        double maxRatioSmallerFont = 0.85;
        double meanXHeight = 0;
        if (textFormat.equals(TextFormat.XHTML)) {
            Mean xHeightMean = new Mean();
            for (Paragraph paragraph : image.getParagraphs()) {
                if (!paragraph.isJunk()) {
                    for (RowOfShapes row : paragraph.getRows()) {
                        for (GroupOfShapes group : row.getGroups()) {
                            for (Shape shape : group.getShapes()) {
                                xHeightMean.increment(shape.getXHeight());
                            }
                        }
                    }
                }
            }
            meanXHeight = xHeightMean.getResult();
        }
        String paragraphString = "<p>";
        if (!image.isLeftToRight())
            paragraphString = "<p dir=\"rtl\">";
        for (Paragraph paragraph : image.getParagraphs()) {
            if (!paragraph.isJunk()) {
                if (textFormat.equals(TextFormat.XHTML))
                    writer.append(paragraphString);
                Map<Integer, Boolean> fontSizeChanges = new TreeMap<Integer, Boolean>();
                int currentFontSize = 0;
                StringBuilder paragraphText = new StringBuilder();
                String lastWord = "";
                boolean lastRowEndedWithHyphen = false;
                for (RowOfShapes row : paragraph.getRows()) {
                    for (GroupOfShapes group : row.getGroups()) {
                        boolean endOfRowHyphen = false;
                        if (textFormat.equals(TextFormat.XHTML)) {
                            double ratio = group.getXHeight() / meanXHeight;
                            if (ratio >= minRatioBiggerFont) {
                                if (currentFontSize <= 0)
                                    fontSizeChanges.put(paragraphText.length(), true);
                                currentFontSize = 1;
                            } else if (ratio <= maxRatioSmallerFont) {
                                if (currentFontSize >= 0)
                                    fontSizeChanges.put(paragraphText.length(), false);
                                currentFontSize = -1;
                            } else if (currentFontSize != 0) {
                                if (currentFontSize > 0)
                                    fontSizeChanges.put(paragraphText.length(), false);
                                else if (currentFontSize < 0)
                                    fontSizeChanges.put(paragraphText.length(), true);
                                currentFontSize = 0;
                            }
                        }
                        StringBuilder sb = new StringBuilder();
                        StringBuilder currentSequence = new StringBuilder();
                        for (Shape shape : group.getShapes()) {
                            String letter = shape.getLetter();
                            if (letter.startsWith("|")) {
                                // beginning of a gehakte letter
                                currentSequence.append(shape.getLetter());
                                continue;
                            } else if (letter.endsWith("|")) {
                                // end of a gehakte letter
                                if (currentSequence.length() > 0 && currentSequence.charAt(0) == '|') {
                                    String letter1 = currentSequence.toString().substring(1);
                                    String letter2 = letter.substring(0, letter.length() - 1);
                                    if (letter1.equals(letter2)) {
                                        letter = letter1;
                                    } else {
                                        letter = currentSequence.toString() + letter;
                                    }
                                    currentSequence = new StringBuilder();
                                }
                            }
                            if (letter.equals(",")) {
                                // could be ",," = "„"
                                if (currentSequence.length() > 0 && currentSequence.charAt(0) == ',') {
                                    sb.append("„");
                                    currentSequence = new StringBuilder();
                                } else {
                                    currentSequence.append(shape.getLetter());
                                }
                            } else if (letter.equals("'")) {
                                // could be "''" = "“"
                                if (currentSequence.length() > 0 && currentSequence.charAt(0) == '\'') {
                                    sb.append("“");
                                    currentSequence = new StringBuilder();
                                } else {
                                    currentSequence.append(shape.getLetter());
                                }
                            } else if (letter.equals("-")) {
                                if (shape.getIndex() == group.getShapes().size() - 1 && group.getIndex() == row.getGroups().size() - 1 && row.getIndex() != paragraph.getRows().size() - 1) {
                                    // do nothing - dash at the end of the
                                    // line
                                    // we'll assume for now these dashes are
                                    // always supposed to disappear
                                    // though of course they could be used
                                    // in the place of a real mid-word dash
                                    endOfRowHyphen = true;
                                } else {
                                    sb.append(shape.getLetter());
                                }
                            } else {
                                sb.append(currentSequence);
                                currentSequence = new StringBuilder();
                                // generalise this
                                if (letter.equals(",,")) {
                                    sb.append("„");
                                } else if (letter.equals("''")) {
                                    sb.append("“");
                                } else {
                                    sb.append(letter);
                                }
                            }
                        }
                        // next shape
                        sb.append(currentSequence);
                        String word = sb.toString();
                        if (endOfRowHyphen) {
                            lastRowEndedWithHyphen = true;
                            endOfRowHyphen = false;
                        } else if (lastRowEndedWithHyphen) {
                            if (lexicon != null) {
                                String hyphenatedWord = lastWord + "-" + word;
                                int frequency = lexicon.getFrequency(hyphenatedWord);
                                LOG.debug("hyphenatedWord: " + hyphenatedWord + ", Frequency: " + frequency);
                                if (frequency > 0) {
                                    paragraphText.append("-");
                                }
                            }
                            lastRowEndedWithHyphen = false;
                        }
                        lastWord = word;
                        paragraphText.append(word);
                        if (!lastRowEndedWithHyphen)
                            paragraphText.append(' ');
                    }
                // next group
                }
                // next row
                String paragraphStr = paragraphText.toString();
                Writer currentWriter = writer;
                boolean haveFontSizes = fontSizeChanges.size() > 0;
                if (haveFontSizes) {
                    currentWriter = new StringWriter();
                }
                if (image.getPage().getDocument().isLeftToRight()) {
                    currentWriter.append(paragraphText);
                } else {
                    this.appendBidiText(paragraphStr, currentWriter);
                }
                if (haveFontSizes) {
                    currentFontSize = 0;
                    String text = currentWriter.toString();
                    int currentIndex = 0;
                    for (int fontSizeChange : fontSizeChanges.keySet()) {
                        boolean isBigger = fontSizeChanges.get(fontSizeChange);
                        writer.append(text.substring(currentIndex, fontSizeChange));
                        if (isBigger) {
                            if (currentFontSize == 0) {
                                writer.append("<big>");
                                currentFontSize++;
                            } else if (currentFontSize < 0) {
                                writer.append("</small>");
                                currentFontSize++;
                            }
                        } else {
                            if (currentFontSize == 0) {
                                writer.append("<small>");
                                currentFontSize--;
                            } else if (currentFontSize > 0) {
                                writer.append("</big>");
                                currentFontSize--;
                            }
                        }
                        currentIndex = fontSizeChange;
                    }
                    writer.append(text.substring(currentIndex));
                    if (currentFontSize > 0) {
                        writer.append("</big>");
                    } else if (currentFontSize < 0) {
                        writer.append("</small>");
                    }
                }
                if (textFormat.equals(TextFormat.XHTML))
                    writer.append("</p>");
                else
                    writer.append('\n');
                writer.flush();
            }
        // paragraph.isJunk()?
        }
    // next paragraph
    } catch (IOException e) {
        LOG.error("Failed writing to " + this.getClass().getSimpleName(), e);
        throw new RuntimeException(e);
    }
}
Also used : Mean(org.apache.commons.math.stat.descriptive.moment.Mean) Shape(com.joliciel.jochre.graphics.Shape) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) IOException(java.io.IOException) TreeMap(java.util.TreeMap) Paragraph(com.joliciel.jochre.graphics.Paragraph) StringWriter(java.io.StringWriter) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) StringWriter(java.io.StringWriter) Writer(java.io.Writer)

Example 5 with Paragraph

use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.

the class TrainingCorpusShapeSplitterTest method testSplit.

@Test
public void testSplit() throws Exception {
    System.setProperty("config.file", "src/test/resources/testDualCharacters.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    final JochreSession jochreSession = new JochreSession(config);
    final Shape shape = mock(Shape.class);
    final Shape shape1 = mock(Shape.class);
    final Shape shape2 = mock(Shape.class);
    final Shape shape3 = mock(Shape.class);
    final Shape shape4 = mock(Shape.class);
    final GroupOfShapes group = mock(GroupOfShapes.class);
    final RowOfShapes row = mock(RowOfShapes.class);
    final Paragraph paragraph = mock(Paragraph.class);
    final JochreImage jochreImage = mock(JochreImage.class);
    final JochrePage jochrePage = mock(JochrePage.class);
    final JochreDocument jochreDocument = mock(JochreDocument.class);
    final Iterator<Split> i = (Iterator<Split>) mock(Iterator.class);
    final List<Split> splits = (List<Split>) mock(List.class);
    final Split split1 = mock(Split.class);
    final Split split2 = mock(Split.class);
    final Split split3 = mock(Split.class);
    when(shape.getLetter()).thenReturn("אָבּער");
    when(shape.getLeft()).thenReturn(100);
    when(shape.getRight()).thenReturn(200);
    when(shape.getTop()).thenReturn(100);
    when(shape.getBottom()).thenReturn(200);
    when(shape.getGroup()).thenReturn(group);
    when(shape.getJochreImage()).thenReturn(jochreImage);
    when(group.getRow()).thenReturn(row);
    when(row.getParagraph()).thenReturn(paragraph);
    when(paragraph.getImage()).thenReturn(jochreImage);
    when(jochreImage.getPage()).thenReturn(jochrePage);
    when(jochrePage.getDocument()).thenReturn(jochreDocument);
    when(jochreDocument.getLocale()).thenReturn(jochreSession.getLocale());
    when(shape.getSplits()).thenReturn(splits);
    when(splits.iterator()).thenReturn(i);
    when(i.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
    when(i.next()).thenReturn(split1).thenReturn(split2).thenReturn(split3);
    when(split1.getPosition()).thenReturn(35);
    when(split2.getPosition()).thenReturn(59);
    when(split3.getPosition()).thenReturn(82);
    when(jochreImage.getShape(100, 100, 135, 200)).thenReturn(shape1);
    when(jochreImage.getShape(136, 100, 159, 200)).thenReturn(shape2);
    when(jochreImage.getShape(160, 100, 182, 200)).thenReturn(shape3);
    when(jochreImage.getShape(183, 100, 200, 200)).thenReturn(shape4);
    LOG.debug(shape.toString());
    LOG.debug(shape.getLetter());
    TrainingCorpusShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
    List<ShapeSequence> result = splitter.split(shape);
    ShapeSequence shapeSequence = result.get(0);
    assertEquals(4, shapeSequence.size());
    LOG.debug("Split into: " + shapeSequence.toString());
    verify(shape1).setLetter("אָ");
    verify(shape2).setLetter("בּ");
    verify(shape3).setLetter("ע");
    verify(shape4).setLetter("ר");
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) Config(com.typesafe.config.Config) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) JochreDocument(com.joliciel.jochre.doc.JochreDocument) JochrePage(com.joliciel.jochre.doc.JochrePage) Paragraph(com.joliciel.jochre.graphics.Paragraph) GroupOfShapes(com.joliciel.jochre.graphics.GroupOfShapes) Iterator(java.util.Iterator) List(java.util.List) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Aggregations

Paragraph (com.joliciel.jochre.graphics.Paragraph)17 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)17 ArrayList (java.util.ArrayList)12 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)10 Test (org.junit.Test)10 Shape (com.joliciel.jochre.graphics.Shape)9 JochreSession (com.joliciel.jochre.JochreSession)8 Config (com.typesafe.config.Config)8 Segmenter (com.joliciel.jochre.graphics.Segmenter)7 SourceImage (com.joliciel.jochre.graphics.SourceImage)7 BufferedImage (java.awt.image.BufferedImage)7 InputStream (java.io.InputStream)7 JochreImage (com.joliciel.jochre.graphics.JochreImage)6 Rectangle (java.awt.Rectangle)6 HashMap (java.util.HashMap)6 JochreDocument (com.joliciel.jochre.doc.JochreDocument)4 JochrePage (com.joliciel.jochre.doc.JochrePage)4 StringWriter (java.io.StringWriter)3 Expectations (mockit.Expectations)3 LetterSequence (com.joliciel.jochre.letterGuesser.LetterSequence)2