use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.
the class SegmentationTest method testSegmentation.
@Test
public void testSegmentation() throws Exception {
// TODO: Note currently this requires high thresholds to work
// Need to decide if this is valid in general, or only for these samples
System.setProperty("config.file", "src/test/resources/testHighThresholds.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
boolean writePixelsToLog = true;
for (int imageNumber = 1; imageNumber <= 4; imageNumber++) {
if (imageNumber != 1)
continue;
String imageName = "";
String suffix = "";
String text = "";
String fileName = "";
String userFileName;
int rowCount = 2;
int shapeCountRow1;
int shapeCountRow2;
int groupCountRow1;
int groupCountRow2;
int groupCountRow3 = 0;
int shapeCountRow1Group1;
int shapeCountRow2Group1;
if (imageNumber == 1) {
imageName = "MotlPeysiDemKhazns2RowsShort2";
suffix = "jpg";
text = "איך געה מיט אייך קיינער אין דער וועלט";
fileName = "MotlPeysiDemKhazns2RowsShort2.pdf";
userFileName = "Motl Peysi Dem Khazns";
shapeCountRow1 = 13;
shapeCountRow2 = 17;
groupCountRow1 = 4;
groupCountRow2 = 4;
shapeCountRow1Group1 = 3;
shapeCountRow2Group1 = 6;
} else if (imageNumber == 2) {
imageName = "MegileLiderZeresh";
suffix = "png";
text = "זרש, די מכשפה, װאָס שעלט ווי אַ מגפה";
fileName = "MegileLiderManger.pdf";
userFileName = "Megile Lider";
shapeCountRow1 = 12;
shapeCountRow2 = 17;
groupCountRow1 = 3;
groupCountRow2 = 5;
shapeCountRow1Group1 = 4;
shapeCountRow2Group1 = 4;
} else if (imageNumber == 3) {
imageName = "MendeleMoykherSforimVol1_41_0Excerpt";
suffix = "png";
text = "ער הייסט יאַנקיל, בעריל,";
fileName = "MendeleMoykherSforimVol1_41_0.png";
userFileName = "MendeleMoykherSforimVol1_41_0";
shapeCountRow1 = 20;
shapeCountRow2 = 0;
groupCountRow1 = 4;
groupCountRow2 = 0;
shapeCountRow1Group1 = 2;
shapeCountRow2Group1 = 0;
} else {
imageName = "JoinedLetterTest";
suffix = "png";
text = "Joined Letter Test";
fileName = "JoinedLetterTest.png";
userFileName = "JoinedLetterTest";
rowCount = 2;
shapeCountRow1 = 23;
shapeCountRow2 = 23;
groupCountRow1 = 4;
groupCountRow2 = 4;
groupCountRow3 = 5;
shapeCountRow1Group1 = 6;
shapeCountRow2Group1 = 5;
}
LOG.debug("######### imageName: " + imageName);
// String fileName = "data/Zelmenyaners3Words.gif";
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/test/resources/" + imageName + "." + suffix);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
JochreDocument doc = new JochreDocument(jochreSession);
doc.setFileName(fileName);
doc.setName(userFileName);
JochrePage page = doc.newPage();
SourceImage sourceImage = page.newJochreImage(image, imageName);
sourceImage.setWhiteGapFillFactor(5);
sourceImage.setImageStatus(ImageStatus.AUTO_NEW);
if (writePixelsToLog) {
LOG.debug("i012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
for (int y = 0; y < sourceImage.getHeight(); y++) {
String line = "" + y;
for (int x = 0; x < sourceImage.getWidth(); x++) {
if (sourceImage.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
if (segmenter.isDrawSegmentation()) {
BufferedImage segmentedImage = segmenter.getSegmentedImage();
File tempDir = new File(System.getProperty("java.io.tmpdir"));
ImageIO.write(segmentedImage, "PNG", new File(tempDir, imageName + "_seg.png"));
}
JochreImage jochreImage = sourceImage;
int i = 0;
boolean firstShape = true;
int midPixelFirstShape = 0;
int midPixelFirstShapeRaw = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
int j = 0;
LOG.debug("============= Row " + i + " ================");
for (Shape shape : row.getShapes()) {
LOG.debug("Shape (" + i + "," + j + "). Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom() + ". Group: " + shape.getGroup().getIndex());
if (firstShape) {
midPixelFirstShape = shape.getPixel(3, 3);
midPixelFirstShapeRaw = shape.getRawPixel(3, 3);
firstShape = false;
}
if (writePixelsToLog) {
for (int y = 0; y < shape.getHeight(); y++) {
String line = "";
if (y == shape.getMeanLine())
line += "M";
else if (y == shape.getBaseLine())
line += "B";
else
line += y;
for (int x = 0; x < shape.getWidth(); x++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
j++;
}
// next shape
i++;
}
// next row
}
// next paragraph
i = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
if (i < text.length()) {
String letter = text.substring(i, i + 1);
String nextLetter = "";
if (i + 1 < text.length())
nextLetter = text.substring(i + 1, i + 2);
if (nextLetter.equals("ָֹ") || nextLetter.equals("ַ")) {
letter += nextLetter;
i++;
}
LOG.debug("Letter: " + letter);
shape.setLetter(letter);
}
i++;
}
// to skip the space
i++;
LOG.debug("Space");
}
// next group
}
// next row
}
// next paragraph
List<ShapeFeature<?>> features = new ArrayList<ShapeFeature<?>>();
features.add(new VerticalElongationFeature());
features.add(new VerticalSizeFeature());
features.add(new TouchesBaseLineFeature());
features.add(new TouchesMeanLineFeature());
features.add(new EmptyCentreFeature());
i = 0;
DecimalFormat df = new DecimalFormat("0.00");
firstShape = true;
int totalRowCount = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
totalRowCount++;
LOG.debug("============= Row " + i + " ================");
int j = 0;
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
LOG.debug("============= Shape (" + i + "," + j + ") ================");
LOG.debug("Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom());
LOG.debug("Letter " + shape.getLetter());
if (firstShape) {
LOG.debug("mid pixel: " + midPixelFirstShape);
assertEquals(midPixelFirstShape, shape.getPixel(3, 3));
LOG.debug("mid pixel raw: " + midPixelFirstShapeRaw);
assertEquals(midPixelFirstShapeRaw, shape.getRawPixel(3, 3));
firstShape = false;
}
if (writePixelsToLog) {
for (int y = 0; y < shape.getHeight(); y++) {
String line = "";
if (y == shape.getMeanLine())
line += "M";
else if (y == shape.getBaseLine())
line += "B";
else
line += y;
for (int x = 0; x < shape.getWidth(); x++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
double[][] totals = shape.getBrightnessBySection(5, 5, 1, SectionBrightnessMeasurementMethod.RAW);
LOG.debug("Brightness counts");
for (int y = 0; y < totals[0].length; y++) {
String line = "";
for (int x = 0; x < totals.length; x++) {
line += df.format(totals[x][y]) + "\t";
}
LOG.debug(line);
}
for (ShapeFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> outcome = feature.check(shape, env);
LOG.debug(outcome.toString());
}
}
if (i == 0) {
if (j == 0)
assertEquals(shapeCountRow1Group1, group.getShapes().size());
} else if (i == 1) {
if (j == 0)
assertEquals(shapeCountRow2Group1, group.getShapes().size());
}
j++;
}
if (i == 0)
assertEquals(groupCountRow1, row.getGroups().size());
else if (i == 1)
assertEquals(groupCountRow2, row.getGroups().size());
else if (i == 2)
assertEquals(groupCountRow3, row.getGroups().size());
if (i == 0)
assertEquals(shapeCountRow1, row.getShapes().size());
else if (i == 1)
assertEquals(shapeCountRow2, row.getShapes().size());
i++;
}
// next row
}
// next paragraph
assertEquals(rowCount, totalRowCount);
}
// next test image
LOG.debug("************** Finished ***********");
}
use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.
the class JochrePage method segment.
/**
* For any Image on this page, segments it by converting to a JochreImage.
*/
public void segment() {
int i = 0;
for (JochreImage image : this.getImages()) {
SourceImage sourceImage = (SourceImage) image;
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
LOG.debug("Image " + i + " segmented: " + sourceImage.getName());
i++;
}
}
use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.
the class JochrePage method newJochreImage.
public SourceImage newJochreImage(BufferedImage image, String imageName) {
SourceImage jochreImage = new SourceImage(this, imageName, image, jochreSession);
this.getImages().add(jochreImage);
jochreImage.setIndex(this.getImages().size());
return jochreImage;
}
use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.
the class JochrePage method segmentAndShow.
/**
* Segment any image on this page and output the segmentation into PNG files
* so that they can be viewed by the user.
*/
public void segmentAndShow(String outputDirectory) {
int i = 0;
for (JochreImage image : this.getImages()) {
SourceImage sourceImage = (SourceImage) image;
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.setDrawSegmentation(true);
segmenter.segment();
BufferedImage segmentedImage = segmenter.getSegmentedImage();
try {
ImageIO.write(segmentedImage, "PNG", new File(outputDirectory + "/" + image.getName() + "_seg.png"));
} catch (IOException e) {
throw new JochreException(e);
}
LOG.debug("Image " + i + " segmented: " + sourceImage.getName());
i++;
}
}
use of com.joliciel.jochre.graphics.SourceImage in project jochre by urieli.
the class SegmentationTest method testPietrushka.
/**
* Pietrushka is a bit unusual in that it contains a column separator in the
* middle. Given that it's in Yiddish, the columns need to be aligned from right
* to left.
*
* @throws Exception
*/
@Test
public void testPietrushka(@Mocked JochrePage jochrePage) throws Exception {
Map<String, Object> configMap = new HashMap<>();
configMap.put("jochre.locale", "yi");
Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
JochreSession jochreSession = new JochreSession(config);
String imageName = "Pietrushka_FolksEntsiklopedyeVol1_17_0.png";
LOG.debug(imageName);
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
List<Rectangle> textPars = new ArrayList<>();
Rectangle textPar1 = new Rectangle(1832, 688, 1336, 1864);
Rectangle textPar2 = new Rectangle(1848, 2608, 1312, 2200);
Rectangle textPar3 = new Rectangle(1848, 4856, 1296, 94);
Rectangle textPar4 = new Rectangle(448, 696, 1320, 2080);
Rectangle textPar5 = new Rectangle(448, 2816, 1328, 2128);
textPars.add(textPar1);
textPars.add(textPar2);
textPars.add(textPar3);
textPars.add(textPar4);
textPars.add(textPar5);
int i = 0;
int j = 0;
List<Paragraph> textParagraphs = new ArrayList<>();
for (Paragraph par : sourceImage.getParagraphs()) {
Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
Rectangle expected = textPars.get(i);
Rectangle intersection = expected.intersection(real);
double realArea = real.width * real.height;
double expectedArea = expected.width * expected.height;
double intersectionArea = intersection.width * intersection.height;
double realRatio = intersectionArea / realArea;
double expectedRatio = intersectionArea / expectedArea;
LOG.debug("Paragraph " + j);
LOG.debug("realRatio: " + realRatio);
LOG.debug("expectedRatio: " + expectedRatio);
if (realRatio > 0.95 && expectedRatio > 0.8) {
LOG.debug("Found");
textParagraphs.add(par);
i++;
}
j++;
}
assertEquals(textPars.size(), textParagraphs.size());
int[] rowCounts = new int[] { 18, 21, 1, 20, 21 };
int[] wordCountsFirstRow = new int[] { 6, 7, 7, 8, 7 };
for (i = 0; i < textParagraphs.size(); i++) {
assertEquals(rowCounts[i], textParagraphs.get(i).getRows().size());
RowOfShapes row = textParagraphs.get(i).getRows().get(0);
assertEquals(wordCountsFirstRow[i], row.getGroups().size());
}
}
Aggregations