use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class SegmentationTest method testSegmentation.
@Test
public void testSegmentation() throws Exception {
// TODO: Note currently this requires high thresholds to work
// Need to decide if this is valid in general, or only for these samples
System.setProperty("config.file", "src/test/resources/testHighThresholds.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
boolean writePixelsToLog = true;
for (int imageNumber = 1; imageNumber <= 4; imageNumber++) {
if (imageNumber != 1)
continue;
String imageName = "";
String suffix = "";
String text = "";
String fileName = "";
String userFileName;
int rowCount = 2;
int shapeCountRow1;
int shapeCountRow2;
int groupCountRow1;
int groupCountRow2;
int groupCountRow3 = 0;
int shapeCountRow1Group1;
int shapeCountRow2Group1;
if (imageNumber == 1) {
imageName = "MotlPeysiDemKhazns2RowsShort2";
suffix = "jpg";
text = "איך געה מיט אייך קיינער אין דער וועלט";
fileName = "MotlPeysiDemKhazns2RowsShort2.pdf";
userFileName = "Motl Peysi Dem Khazns";
shapeCountRow1 = 13;
shapeCountRow2 = 17;
groupCountRow1 = 4;
groupCountRow2 = 4;
shapeCountRow1Group1 = 3;
shapeCountRow2Group1 = 6;
} else if (imageNumber == 2) {
imageName = "MegileLiderZeresh";
suffix = "png";
text = "זרש, די מכשפה, װאָס שעלט ווי אַ מגפה";
fileName = "MegileLiderManger.pdf";
userFileName = "Megile Lider";
shapeCountRow1 = 12;
shapeCountRow2 = 17;
groupCountRow1 = 3;
groupCountRow2 = 5;
shapeCountRow1Group1 = 4;
shapeCountRow2Group1 = 4;
} else if (imageNumber == 3) {
imageName = "MendeleMoykherSforimVol1_41_0Excerpt";
suffix = "png";
text = "ער הייסט יאַנקיל, בעריל,";
fileName = "MendeleMoykherSforimVol1_41_0.png";
userFileName = "MendeleMoykherSforimVol1_41_0";
shapeCountRow1 = 20;
shapeCountRow2 = 0;
groupCountRow1 = 4;
groupCountRow2 = 0;
shapeCountRow1Group1 = 2;
shapeCountRow2Group1 = 0;
} else {
imageName = "JoinedLetterTest";
suffix = "png";
text = "Joined Letter Test";
fileName = "JoinedLetterTest.png";
userFileName = "JoinedLetterTest";
rowCount = 2;
shapeCountRow1 = 23;
shapeCountRow2 = 23;
groupCountRow1 = 4;
groupCountRow2 = 4;
groupCountRow3 = 5;
shapeCountRow1Group1 = 6;
shapeCountRow2Group1 = 5;
}
LOG.debug("######### imageName: " + imageName);
// String fileName = "data/Zelmenyaners3Words.gif";
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/test/resources/" + imageName + "." + suffix);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
JochreDocument doc = new JochreDocument(jochreSession);
doc.setFileName(fileName);
doc.setName(userFileName);
JochrePage page = doc.newPage();
SourceImage sourceImage = page.newJochreImage(image, imageName);
sourceImage.setWhiteGapFillFactor(5);
sourceImage.setImageStatus(ImageStatus.AUTO_NEW);
if (writePixelsToLog) {
LOG.debug("i012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
for (int y = 0; y < sourceImage.getHeight(); y++) {
String line = "" + y;
for (int x = 0; x < sourceImage.getWidth(); x++) {
if (sourceImage.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
if (segmenter.isDrawSegmentation()) {
BufferedImage segmentedImage = segmenter.getSegmentedImage();
File tempDir = new File(System.getProperty("java.io.tmpdir"));
ImageIO.write(segmentedImage, "PNG", new File(tempDir, imageName + "_seg.png"));
}
JochreImage jochreImage = sourceImage;
int i = 0;
boolean firstShape = true;
int midPixelFirstShape = 0;
int midPixelFirstShapeRaw = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
int j = 0;
LOG.debug("============= Row " + i + " ================");
for (Shape shape : row.getShapes()) {
LOG.debug("Shape (" + i + "," + j + "). Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom() + ". Group: " + shape.getGroup().getIndex());
if (firstShape) {
midPixelFirstShape = shape.getPixel(3, 3);
midPixelFirstShapeRaw = shape.getRawPixel(3, 3);
firstShape = false;
}
if (writePixelsToLog) {
for (int y = 0; y < shape.getHeight(); y++) {
String line = "";
if (y == shape.getMeanLine())
line += "M";
else if (y == shape.getBaseLine())
line += "B";
else
line += y;
for (int x = 0; x < shape.getWidth(); x++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
j++;
}
// next shape
i++;
}
// next row
}
// next paragraph
i = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
if (i < text.length()) {
String letter = text.substring(i, i + 1);
String nextLetter = "";
if (i + 1 < text.length())
nextLetter = text.substring(i + 1, i + 2);
if (nextLetter.equals("ָֹ") || nextLetter.equals("ַ")) {
letter += nextLetter;
i++;
}
LOG.debug("Letter: " + letter);
shape.setLetter(letter);
}
i++;
}
// to skip the space
i++;
LOG.debug("Space");
}
// next group
}
// next row
}
// next paragraph
List<ShapeFeature<?>> features = new ArrayList<ShapeFeature<?>>();
features.add(new VerticalElongationFeature());
features.add(new VerticalSizeFeature());
features.add(new TouchesBaseLineFeature());
features.add(new TouchesMeanLineFeature());
features.add(new EmptyCentreFeature());
i = 0;
DecimalFormat df = new DecimalFormat("0.00");
firstShape = true;
int totalRowCount = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
totalRowCount++;
LOG.debug("============= Row " + i + " ================");
int j = 0;
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
LOG.debug("============= Shape (" + i + "," + j + ") ================");
LOG.debug("Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom());
LOG.debug("Letter " + shape.getLetter());
if (firstShape) {
LOG.debug("mid pixel: " + midPixelFirstShape);
assertEquals(midPixelFirstShape, shape.getPixel(3, 3));
LOG.debug("mid pixel raw: " + midPixelFirstShapeRaw);
assertEquals(midPixelFirstShapeRaw, shape.getRawPixel(3, 3));
firstShape = false;
}
if (writePixelsToLog) {
for (int y = 0; y < shape.getHeight(); y++) {
String line = "";
if (y == shape.getMeanLine())
line += "M";
else if (y == shape.getBaseLine())
line += "B";
else
line += y;
for (int x = 0; x < shape.getWidth(); x++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
double[][] totals = shape.getBrightnessBySection(5, 5, 1, SectionBrightnessMeasurementMethod.RAW);
LOG.debug("Brightness counts");
for (int y = 0; y < totals[0].length; y++) {
String line = "";
for (int x = 0; x < totals.length; x++) {
line += df.format(totals[x][y]) + "\t";
}
LOG.debug(line);
}
for (ShapeFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> outcome = feature.check(shape, env);
LOG.debug(outcome.toString());
}
}
if (i == 0) {
if (j == 0)
assertEquals(shapeCountRow1Group1, group.getShapes().size());
} else if (i == 1) {
if (j == 0)
assertEquals(shapeCountRow2Group1, group.getShapes().size());
}
j++;
}
if (i == 0)
assertEquals(groupCountRow1, row.getGroups().size());
else if (i == 1)
assertEquals(groupCountRow2, row.getGroups().size());
else if (i == 2)
assertEquals(groupCountRow3, row.getGroups().size());
if (i == 0)
assertEquals(shapeCountRow1, row.getShapes().size());
else if (i == 1)
assertEquals(shapeCountRow2, row.getShapes().size());
i++;
}
// next row
}
// next paragraph
assertEquals(rowCount, totalRowCount);
}
// next test image
LOG.debug("************** Finished ***********");
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class TextGetterImplTest method testGetTextFontSizes.
@Test
public void testGetTextFontSizes() {
final JochreDocument doc = mock(JochreDocument.class);
final JochrePage page = mock(JochrePage.class);
final JochreImage jochreImage = mock(JochreImage.class);
final Paragraph paragraph = mock(Paragraph.class);
final RowOfShapes row = mock(RowOfShapes.class);
final GroupOfShapes group1 = mock(GroupOfShapes.class);
final GroupOfShapes group2 = mock(GroupOfShapes.class);
final GroupOfShapes group3 = mock(GroupOfShapes.class);
final GroupOfShapes group4 = mock(GroupOfShapes.class);
final Shape shape1 = mock(Shape.class);
final Shape shape2 = mock(Shape.class);
final Shape shape3 = mock(Shape.class);
final Shape shape4 = mock(Shape.class);
final List<Paragraph> paragraphs = new ArrayList<>();
paragraphs.add(paragraph);
final List<RowOfShapes> rows = new ArrayList<>();
rows.add(row);
when(jochreImage.getPage()).thenReturn(page);
when(page.getDocument()).thenReturn(doc);
when(doc.isLeftToRight()).thenReturn(true);
when(jochreImage.getParagraphs()).thenReturn(paragraphs);
when(paragraph.getRows()).thenReturn(rows);
List<GroupOfShapes> groups = new ArrayList<>();
groups.add(group1);
groups.add(group2);
groups.add(group3);
groups.add(group4);
when(row.getGroups()).thenReturn(groups);
List<Shape> shapes1 = new ArrayList<>();
shapes1.add(shape1);
when(group1.getShapes()).thenReturn(shapes1);
when(group1.getXHeight()).thenReturn(10);
List<Shape> shapes2 = new ArrayList<>();
shapes2.add(shape2);
when(group2.getShapes()).thenReturn(shapes2);
when(group2.getXHeight()).thenReturn(20);
List<Shape> shapes3 = new ArrayList<>();
shapes3.add(shape3);
when(group3.getShapes()).thenReturn(shapes3);
when(group3.getXHeight()).thenReturn(10);
List<Shape> shapes4 = new ArrayList<>();
shapes4.add(shape4);
when(group4.getShapes()).thenReturn(shapes4);
when(group4.getXHeight()).thenReturn(5);
when(shape1.getLetter()).thenReturn("A");
when(shape1.getXHeight()).thenReturn(10);
when(shape2.getLetter()).thenReturn("B");
when(shape2.getXHeight()).thenReturn(20);
when(shape3.getLetter()).thenReturn("C");
when(shape3.getXHeight()).thenReturn(10);
when(shape4.getLetter()).thenReturn("D");
when(shape4.getXHeight()).thenReturn(5);
StringWriter writer = new StringWriter();
TextGetter textGetter = new TextGetter(writer, TextFormat.XHTML);
textGetter.onImageComplete(jochreImage);
String result = writer.toString();
LOG.debug(result);
assertEquals("<p dir=\"rtl\">A <big>B </big>C <small>D </small></p>", result);
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class TextGetterImplTest method testGetText.
@Test
public void testGetText() {
final JochreDocument doc = mock(JochreDocument.class);
final JochrePage page = mock(JochrePage.class);
final JochreImage jochreImage = mock(JochreImage.class);
final Paragraph paragraph = mock(Paragraph.class);
final RowOfShapes row = mock(RowOfShapes.class);
final GroupOfShapes group = mock(GroupOfShapes.class);
final Shape shape1 = mock(Shape.class);
final Shape shape2 = mock(Shape.class);
final Shape shape3 = mock(Shape.class);
final Shape shape4 = mock(Shape.class);
final Shape shape5 = mock(Shape.class);
final Shape shape6 = mock(Shape.class);
final Shape shape7 = mock(Shape.class);
final Shape shape8 = mock(Shape.class);
final Shape shape9 = mock(Shape.class);
final List<Paragraph> paragraphs = new ArrayList<>();
paragraphs.add(paragraph);
final List<RowOfShapes> rows = new ArrayList<>();
rows.add(row);
final List<GroupOfShapes> groups = new ArrayList<>();
groups.add(group);
when(jochreImage.getPage()).thenReturn(page);
when(page.getDocument()).thenReturn(doc);
when(doc.isLeftToRight()).thenReturn(false);
when(jochreImage.getParagraphs()).thenReturn(paragraphs);
when(paragraph.getRows()).thenReturn(rows);
when(row.getGroups()).thenReturn(groups);
List<Shape> shapes = new ArrayList<>();
shapes.add(shape1);
shapes.add(shape2);
shapes.add(shape3);
shapes.add(shape4);
shapes.add(shape5);
shapes.add(shape6);
shapes.add(shape7);
shapes.add(shape8);
shapes.add(shape9);
when(group.getShapes()).thenReturn(shapes);
when(group.getXHeight()).thenReturn(10);
when(shape1.getLetter()).thenReturn(",");
when(shape2.getLetter()).thenReturn(",");
when(shape3.getLetter()).thenReturn("|אַ");
when(shape4.getLetter()).thenReturn("אַ|");
when(shape5.getLetter()).thenReturn("|m");
when(shape6.getLetter()).thenReturn("m|");
when(shape7.getLetter()).thenReturn("|ש");
when(shape8.getLetter()).thenReturn("ע|");
when(shape9.getLetter()).thenReturn(",");
StringWriter writer = new StringWriter();
TextGetter textGetter = new TextGetter(writer, TextFormat.PLAIN);
textGetter.onImageComplete(jochreImage);
String result = writer.toString();
LOG.debug(result);
assertEquals("„אַm|שע|, \n", result);
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class TextGetter method onImageComplete.
@Override
public void onImageComplete(JochreImage image) {
try {
double minRatioBiggerFont = 1.15;
double maxRatioSmallerFont = 0.85;
double meanXHeight = 0;
if (textFormat.equals(TextFormat.XHTML)) {
Mean xHeightMean = new Mean();
for (Paragraph paragraph : image.getParagraphs()) {
if (!paragraph.isJunk()) {
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
xHeightMean.increment(shape.getXHeight());
}
}
}
}
}
meanXHeight = xHeightMean.getResult();
}
String paragraphString = "<p>";
if (!image.isLeftToRight())
paragraphString = "<p dir=\"rtl\">";
for (Paragraph paragraph : image.getParagraphs()) {
if (!paragraph.isJunk()) {
if (textFormat.equals(TextFormat.XHTML))
writer.append(paragraphString);
Map<Integer, Boolean> fontSizeChanges = new TreeMap<Integer, Boolean>();
int currentFontSize = 0;
StringBuilder paragraphText = new StringBuilder();
String lastWord = "";
boolean lastRowEndedWithHyphen = false;
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
boolean endOfRowHyphen = false;
if (textFormat.equals(TextFormat.XHTML)) {
double ratio = group.getXHeight() / meanXHeight;
if (ratio >= minRatioBiggerFont) {
if (currentFontSize <= 0)
fontSizeChanges.put(paragraphText.length(), true);
currentFontSize = 1;
} else if (ratio <= maxRatioSmallerFont) {
if (currentFontSize >= 0)
fontSizeChanges.put(paragraphText.length(), false);
currentFontSize = -1;
} else if (currentFontSize != 0) {
if (currentFontSize > 0)
fontSizeChanges.put(paragraphText.length(), false);
else if (currentFontSize < 0)
fontSizeChanges.put(paragraphText.length(), true);
currentFontSize = 0;
}
}
StringBuilder sb = new StringBuilder();
StringBuilder currentSequence = new StringBuilder();
for (Shape shape : group.getShapes()) {
String letter = shape.getLetter();
if (letter.startsWith("|")) {
// beginning of a gehakte letter
currentSequence.append(shape.getLetter());
continue;
} else if (letter.endsWith("|")) {
// end of a gehakte letter
if (currentSequence.length() > 0 && currentSequence.charAt(0) == '|') {
String letter1 = currentSequence.toString().substring(1);
String letter2 = letter.substring(0, letter.length() - 1);
if (letter1.equals(letter2)) {
letter = letter1;
} else {
letter = currentSequence.toString() + letter;
}
currentSequence = new StringBuilder();
}
}
if (letter.equals(",")) {
// could be ",," = "„"
if (currentSequence.length() > 0 && currentSequence.charAt(0) == ',') {
sb.append("„");
currentSequence = new StringBuilder();
} else {
currentSequence.append(shape.getLetter());
}
} else if (letter.equals("'")) {
// could be "''" = "“"
if (currentSequence.length() > 0 && currentSequence.charAt(0) == '\'') {
sb.append("“");
currentSequence = new StringBuilder();
} else {
currentSequence.append(shape.getLetter());
}
} else if (letter.equals("-")) {
if (shape.getIndex() == group.getShapes().size() - 1 && group.getIndex() == row.getGroups().size() - 1 && row.getIndex() != paragraph.getRows().size() - 1) {
// do nothing - dash at the end of the
// line
// we'll assume for now these dashes are
// always supposed to disappear
// though of course they could be used
// in the place of a real mid-word dash
endOfRowHyphen = true;
} else {
sb.append(shape.getLetter());
}
} else {
sb.append(currentSequence);
currentSequence = new StringBuilder();
// generalise this
if (letter.equals(",,")) {
sb.append("„");
} else if (letter.equals("''")) {
sb.append("“");
} else {
sb.append(letter);
}
}
}
// next shape
sb.append(currentSequence);
String word = sb.toString();
if (endOfRowHyphen) {
lastRowEndedWithHyphen = true;
endOfRowHyphen = false;
} else if (lastRowEndedWithHyphen) {
if (lexicon != null) {
String hyphenatedWord = lastWord + "-" + word;
int frequency = lexicon.getFrequency(hyphenatedWord);
LOG.debug("hyphenatedWord: " + hyphenatedWord + ", Frequency: " + frequency);
if (frequency > 0) {
paragraphText.append("-");
}
}
lastRowEndedWithHyphen = false;
}
lastWord = word;
paragraphText.append(word);
if (!lastRowEndedWithHyphen)
paragraphText.append(' ');
}
// next group
}
// next row
String paragraphStr = paragraphText.toString();
Writer currentWriter = writer;
boolean haveFontSizes = fontSizeChanges.size() > 0;
if (haveFontSizes) {
currentWriter = new StringWriter();
}
if (image.getPage().getDocument().isLeftToRight()) {
currentWriter.append(paragraphText);
} else {
this.appendBidiText(paragraphStr, currentWriter);
}
if (haveFontSizes) {
currentFontSize = 0;
String text = currentWriter.toString();
int currentIndex = 0;
for (int fontSizeChange : fontSizeChanges.keySet()) {
boolean isBigger = fontSizeChanges.get(fontSizeChange);
writer.append(text.substring(currentIndex, fontSizeChange));
if (isBigger) {
if (currentFontSize == 0) {
writer.append("<big>");
currentFontSize++;
} else if (currentFontSize < 0) {
writer.append("</small>");
currentFontSize++;
}
} else {
if (currentFontSize == 0) {
writer.append("<small>");
currentFontSize--;
} else if (currentFontSize > 0) {
writer.append("</big>");
currentFontSize--;
}
}
currentIndex = fontSizeChange;
}
writer.append(text.substring(currentIndex));
if (currentFontSize > 0) {
writer.append("</big>");
} else if (currentFontSize < 0) {
writer.append("</small>");
}
}
if (textFormat.equals(TextFormat.XHTML))
writer.append("</p>");
else
writer.append('\n');
writer.flush();
}
// paragraph.isJunk()?
}
// next paragraph
} catch (IOException e) {
LOG.error("Failed writing to " + this.getClass().getSimpleName(), e);
throw new RuntimeException(e);
}
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShape.
/**
* Make sure we get 5 equally weighted sequences in the case of a 50/50 prob for
* splitting each time.
*/
@SuppressWarnings("unchecked")
@Test
public void testSplitShape() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.5);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.5);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
for (ShapeSequence shapeSequence : shapeSequences) {
assertEquals(1.0, shapeSequence.getScore(), 0.0001);
}
}
Aggregations