Search in sources :

Example 36 with JochreSession

use of com.joliciel.jochre.JochreSession in project jochre by urieli.

the class RecursiveShapeSplitterTest method testSplitShapeNoSplitMoreLikely.

@SuppressWarnings("unchecked")
@Test
public void testSplitShapeNoSplitMoreLikely() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
    final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
    final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
    shape.setBaseLine(12);
    shape.setMeanLine(4);
    final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
    shape1.setBaseLine(12);
    shape1.setMeanLine(4);
    final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
    shape2.setBaseLine(12);
    shape2.setMeanLine(4);
    final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
    final DecisionMaker decisionMaker = mock(DecisionMaker.class);
    Split split = new Split(shape, jochreSession);
    split.setPosition(31);
    List<Split> splits = new ArrayList<>();
    splits.add(split);
    when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
    Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.4);
    Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.6);
    List<Decision> decisions = new ArrayList<>();
    decisions.add(yesDecision);
    decisions.add(noDecision);
    when(decisionMaker.decide(anyList())).thenReturn(decisions);
    Split split1 = new Split(shape1, jochreSession);
    split1.setPosition(15);
    List<Split> splits1 = new ArrayList<>();
    splits1.add(split1);
    when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
    Split split2 = new Split(shape2, jochreSession);
    split2.setPosition(15);
    List<Split> splits2 = new ArrayList<>();
    splits2.add(split2);
    when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
    Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
    RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
    splitter.setBeamWidth(10);
    splitter.setMaxDepth(2);
    splitter.setMinWidthRatio(1.0);
    List<ShapeSequence> shapeSequences = splitter.split(shape);
    assertEquals(5, shapeSequences.size());
    int i = 0;
    double prob = 1.0;
    double twoThirds = 0.4 / 0.6;
    LOG.debug("twoThirds: " + twoThirds);
    for (ShapeSequence shapeSequence : shapeSequences) {
        LOG.debug("sequence " + i + " decisions:");
        for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
        if (i == 0) {
            prob = 1.0;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(1, shapeSequence.size());
        } else if (i == 1) {
            prob = 1.0 * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(2, shapeSequence.size());
        } else if (i == 2) {
            prob = 1.0 * twoThirds * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(3, shapeSequence.size());
        } else if (i == 3) {
            prob = 1.0 * twoThirds * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(3, shapeSequence.size());
        } else if (i == 4) {
            prob = 1.0 * twoThirds * twoThirds * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(4, shapeSequence.size());
        }
        i++;
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) BufferedImage(java.awt.image.BufferedImage) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 37 with JochreSession

use of com.joliciel.jochre.JochreSession in project jochre by urieli.

the class RecursiveShapeSplitterTest method testSplitShapeSplitMoreLikely.

/**
 * If a split is always more likely (e.g. 60% likelihood), ensure the shape
 * sequences are ordered correctly.
 */
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeSplitMoreLikely() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
    final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
    final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
    shape.setBaseLine(12);
    shape.setMeanLine(4);
    final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
    shape1.setBaseLine(12);
    shape1.setMeanLine(4);
    final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
    shape2.setBaseLine(12);
    shape2.setMeanLine(4);
    final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
    final DecisionMaker decisionMaker = mock(DecisionMaker.class);
    Split split = new Split(shape, jochreSession);
    split.setPosition(31);
    List<Split> splits = new ArrayList<>();
    splits.add(split);
    when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
    Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.6);
    Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.4);
    List<Decision> decisions = new ArrayList<>();
    decisions.add(yesDecision);
    decisions.add(noDecision);
    when(decisionMaker.decide(anyList())).thenReturn(decisions);
    Split split1 = new Split(shape1, jochreSession);
    split1.setPosition(15);
    List<Split> splits1 = new ArrayList<>();
    splits1.add(split1);
    when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
    Split split2 = new Split(shape2, jochreSession);
    split2.setPosition(15);
    List<Split> splits2 = new ArrayList<>();
    splits2.add(split2);
    when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
    Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
    RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
    splitter.setBeamWidth(10);
    splitter.setMaxDepth(2);
    splitter.setMinWidthRatio(1.0);
    List<ShapeSequence> shapeSequences = splitter.split(shape);
    assertEquals(5, shapeSequences.size());
    int i = 0;
    for (ShapeSequence shapeSequence : shapeSequences) {
        LOG.debug("sequence " + i + " shapes:");
        for (ShapeInSequence shapeInSequence : shapeSequence) {
            Shape oneShape = shapeInSequence.getShape();
            LOG.debug("Shape: " + oneShape.getLeft() + "," + oneShape.getRight());
        }
        LOG.debug("" + shapeSequence.getScore());
        i++;
    }
    i = 0;
    double prob = 1.0;
    double twoThirds = 0.4 / 0.6;
    LOG.debug("twoThirds: " + twoThirds);
    for (ShapeSequence shapeSequence : shapeSequences) {
        LOG.debug("sequence " + i + " decisions:");
        for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
        if (i == 0) {
            prob = 1.0;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(4, shapeSequence.size());
        } else if (i == 1) {
            prob = 1.0 * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(3, shapeSequence.size());
        } else if (i == 2) {
            prob = 1.0 * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(3, shapeSequence.size());
        } else if (i == 3) {
            prob = 1.0 * twoThirds * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(2, shapeSequence.size());
        } else if (i == 4) {
            prob = 1.0 * twoThirds * twoThirds * twoThirds;
            assertEquals(prob, shapeSequence.getScore(), 0.0001);
            assertEquals(1, shapeSequence.size());
        }
        i++;
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) BufferedImage(java.awt.image.BufferedImage) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 38 with JochreSession

use of com.joliciel.jochre.JochreSession in project jochre by urieli.

the class SplitCandidateFinderImplTest method testFindSplitCanidates.

@Test
public void testFindSplitCanidates() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    InputStream imageFileStream = getClass().getResourceAsStream("shape_370454.png");
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    final JochrePage page = mock(JochrePage.class);
    JochreImage jochreImage = new SourceImage(page, "name", image, jochreSession);
    Shape shape = jochreImage.getShape(0, 0, jochreImage.getWidth() - 1, jochreImage.getHeight() - 1);
    SplitCandidateFinder splitCandidateFinder = new SplitCandidateFinder(jochreSession);
    List<Split> splits = splitCandidateFinder.findSplitCandidates(shape);
    int[] trueSplitPositions = new int[] { 38, 59, 82 };
    boolean[] foundSplit = new boolean[] { false, false, false };
    for (Split splitCandidate : splits) {
        LOG.debug("Split candidate at " + splitCandidate.getPosition());
        for (int i = 0; i < trueSplitPositions.length; i++) {
            int truePos = trueSplitPositions[i];
            int distance = splitCandidate.getPosition() - truePos;
            if (distance < 0)
                distance = 0 - distance;
            if (distance < splitCandidateFinder.getMinDistanceBetweenSplits()) {
                foundSplit[i] = true;
                LOG.debug("Found split: " + truePos + ", distance " + distance);
            }
        }
    }
    for (int i = 0; i < trueSplitPositions.length; i++) {
        assertTrue("didn't find split " + trueSplitPositions[i], foundSplit[i]);
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) SourceImage(com.joliciel.jochre.graphics.SourceImage) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) JochrePage(com.joliciel.jochre.doc.JochrePage) BufferedImage(java.awt.image.BufferedImage) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 39 with JochreSession

use of com.joliciel.jochre.JochreSession in project jochre by urieli.

the class SegmentationTest method testAlsacien1.

/**
 * Segmentation errors reported for Alsacien.
 *
 * @throws Exception
 */
@Test
public void testAlsacien1(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "Alsacien1.jpg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(715, 517, 462, 115);
    // TODO: for now it's splitting this paragraph by row, since it's assuming
    // paragraphs cannot be
    // both outdented and indented on the same page
    // Rectangle textPar2 = new Rectangle(50, 666, 1798, 1039);
    Rectangle textPar3 = new Rectangle(55, 1837, 1777, 335);
    Rectangle textPar4 = new Rectangle(50, 2211, 1765, 154);
    Rectangle textPar5 = new Rectangle(44, 2404, 1782, 511);
    Rectangle textPar6 = new Rectangle(50, 2948, 1776, 154);
    Rectangle textPar7 = new Rectangle(50, 3135, 1770, 77);
    // title paragraph
    textPars.add(textPar1);
    // textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    textPars.add(textPar5);
    textPars.add(textPar6);
    textPars.add(textPar7);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 4, 2, 6, 2, 1 };
    int[] wordCountsFirstRow = new int[] { 2, 0, 0, 0, 0, 0, 0 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 40 with JochreSession

use of com.joliciel.jochre.JochreSession in project jochre by urieli.

the class SegmentationTest method testAlsacienPlay3.

/**
 * Segmentation errors reported for Alsacien play - challenging because of the
 * unusual indentation.
 *
 * @throws Exception
 */
@Test
public void testAlsacienPlay3(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
    Map<String, Object> configMap = new HashMap<>();
    configMap.put("jochre.locale", "de");
    Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
    JochreSession jochreSession = new JochreSession(config);
    new Expectations() {

        {
            jochrePage.getDocument();
            result = jochreDoc;
            minTimes = 0;
            jochreDoc.isLeftToRight();
            result = true;
            minTimes = 0;
        }
    };
    String imageName = "AlsacienPlay3.jpg";
    LOG.debug(imageName);
    InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
    assertNotNull(imageFileStream);
    BufferedImage image = ImageIO.read(imageFileStream);
    SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
    Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
    segmenter.segment();
    List<Rectangle> textPars = new ArrayList<>();
    Rectangle textPar1 = new Rectangle(712, 532, 556, 52);
    Rectangle textPar2 = new Rectangle(324, 600, 1324, 128);
    Rectangle textPar3 = new Rectangle(680, 730, 592, 50);
    Rectangle textPar4 = new Rectangle(404, 808, 684, 48);
    // title paragraph
    textPars.add(textPar1);
    textPars.add(textPar2);
    textPars.add(textPar3);
    textPars.add(textPar4);
    int i = 0;
    int j = 0;
    List<Paragraph> textParagraphs = new ArrayList<>();
    for (Paragraph par : sourceImage.getParagraphs()) {
        Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
        Rectangle expected = textPars.get(i);
        Rectangle intersection = expected.intersection(real);
        double realArea = real.width * real.height;
        double expectedArea = expected.width * expected.height;
        double intersectionArea = intersection.width * intersection.height;
        double realRatio = intersectionArea / realArea;
        double expectedRatio = intersectionArea / expectedArea;
        LOG.debug("Paragraph " + j + ": " + par.toString());
        LOG.debug("realRatio: " + realRatio);
        LOG.debug("expectedRatio: " + expectedRatio);
        if (realRatio > 0.8 && expectedRatio > 0.8) {
            LOG.debug("Found");
            textParagraphs.add(par);
            i++;
            if (i >= textPars.size())
                break;
        }
        j++;
    }
    assertEquals(textPars.size(), textParagraphs.size());
    int[] rowCounts = new int[] { 1, 2, 1, 1 };
    // TODO: words in "spaced" rows (uses spacing to emphasize instead of bold
    // or italics) get split
    // should try to detect multiple single letter words
    int[] wordCountsFirstRow = new int[] { 0, 10, 0, 5 };
    for (i = 0; i < textParagraphs.size(); i++) {
        assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
        RowOfShapes row = textParagraphs.get(i).getRows().get(0);
        if (wordCountsFirstRow[i] > 0)
            assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
    }
}
Also used : Expectations(mockit.Expectations) SourceImage(com.joliciel.jochre.graphics.SourceImage) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) InputStream(java.io.InputStream) Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) RowOfShapes(com.joliciel.jochre.graphics.RowOfShapes) Segmenter(com.joliciel.jochre.graphics.Segmenter) BufferedImage(java.awt.image.BufferedImage) Paragraph(com.joliciel.jochre.graphics.Paragraph) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Aggregations

JochreSession (com.joliciel.jochre.JochreSession)40 Config (com.typesafe.config.Config)34 Test (org.junit.Test)34 BufferedImage (java.awt.image.BufferedImage)20 ArrayList (java.util.ArrayList)13 InputStream (java.io.InputStream)10 Paragraph (com.joliciel.jochre.graphics.Paragraph)8 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)8 SourceImage (com.joliciel.jochre.graphics.SourceImage)8 Segmenter (com.joliciel.jochre.graphics.Segmenter)7 Shape (com.joliciel.jochre.graphics.Shape)7 ImagePixelGrabber (com.joliciel.jochre.utils.graphics.ImagePixelGrabber)7 JochreImage (com.joliciel.jochre.graphics.JochreImage)6 Rectangle (java.awt.Rectangle)6 HashMap (java.util.HashMap)6 Session (org.zkoss.zk.ui.Session)6 BitSet (java.util.BitSet)5 JochrePage (com.joliciel.jochre.doc.JochrePage)4 SplitFeature (com.joliciel.jochre.boundaries.features.SplitFeature)3 JochreDocument (com.joliciel.jochre.doc.JochreDocument)3