use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.
the class ImageController method save.
void save() {
try {
Comboitem selectedItem = cmbStatus.getSelectedItem();
ImageStatus imageStatus = ImageStatus.forId((Integer) selectedItem.getValue());
currentImage.setImageStatus(imageStatus);
if (currentUser.getRole().equals(UserRole.ADMIN)) {
User owner = (User) cmbOwner.getSelectedItem().getValue();
currentImage.setOwner(owner);
}
GraphicsDao graphicsDao = GraphicsDao.getInstance(jochreSession);
graphicsDao.saveJochreImage(currentImage);
for (Paragraph paragraph : currentImage.getParagraphs()) {
LOG.trace("Paragraph " + paragraph.getIndex() + ", " + paragraph.getRows().size() + " rows");
for (RowOfShapes row : paragraph.getRows()) {
List<List<String>> letterGroups = this.getLetterGroups(row);
LOG.trace("Row " + row.getIndex() + ", " + row.getGroups().size() + " groups, " + letterGroups.size() + " letter groups");
Iterator<List<String>> iLetterGroups = letterGroups.iterator();
for (GroupOfShapes group : row.getGroups()) {
LOG.trace("Group " + group.getIndex() + " text : " + group.getWord());
boolean hasChange = false;
List<String> letters = null;
if (iLetterGroups.hasNext())
letters = iLetterGroups.next();
else
letters = new ArrayList<String>();
LOG.trace("Found " + letters.size() + " letters in text");
Iterator<String> iLetters = letters.iterator();
for (Shape shape : group.getShapes()) {
String currentLetter = shape.getLetter();
if (currentLetter == null)
currentLetter = "";
String newLetter = "";
if (iLetters.hasNext())
newLetter = iLetters.next();
if (newLetter.startsWith("[") && newLetter.endsWith("]")) {
newLetter = newLetter.substring(1, newLetter.length() - 1);
}
LOG.trace("currentLetter: " + currentLetter + ", newLetter: " + newLetter);
if (!currentLetter.equals(newLetter)) {
LOG.trace("newLetter: " + newLetter);
shape.setLetter(newLetter);
shape.save();
hasChange = true;
}
}
if (hasChange)
LOG.trace("Group text after : " + group.getWord());
}
// next group
}
// next row
}
// next paragraph
Messagebox.show(Labels.getLabel("button.saveComplete"));
} catch (Exception e) {
LOG.error("Failure in save", e);
throw new RuntimeException(e);
}
}
use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.
the class UnknownWordListWriter method onImageComplete.
@Override
public void onImageComplete(JochreImage image) {
try {
for (Paragraph paragraph : image.getParagraphs()) {
if (!paragraph.isJunk()) {
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
if (group.getBestLetterSequence() != null) {
for (LetterSequence subsequence : group.getBestLetterSequence().getSubsequences()) {
for (CountedOutcome<String> wordFrequency : subsequence.getWordFrequencies()) {
if (wordFrequency.getCount() == 0) {
writer.write(wordFrequency.getOutcome() + "\n");
writer.flush();
}
}
}
}
}
}
}
}
} catch (IOException e) {
LOG.error("Failed to write to UnknownWordListWriter", e);
throw new RuntimeException(e);
}
}
use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.
the class CorpusLexiconBuilder method buildLexicon.
/**
* Build a lexicon from the training corpus.
*/
public TextFileLexicon buildLexicon() {
TextFileLexicon lexicon = new TextFileLexicon();
JochreCorpusImageReader imageReader = new JochreCorpusImageReader(jochreSession);
imageReader.setSelectionCriteria(criteria);
String wordText = "";
while (imageReader.hasNext()) {
JochreImage image = imageReader.next();
for (Paragraph paragraph : image.getParagraphs()) {
// rows ending in dashes can only be held-over within the same
// paragraph.
// to avoid strange things like a page number getting added to
// the word,
// if the dash is on the last row of the page.
String holdoverWord = null;
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
if (group.isBrokenWord())
continue;
wordText = "";
for (Shape shape : group.getShapes()) {
if (shape.getLetter() != null)
wordText += shape.getLetter();
}
if (wordText.length() == 0) {
lexicon.incrementEntry("");
continue;
}
List<String> words = jochreSession.getLinguistics().splitText(wordText);
int i = 0;
for (String word : words) {
if (i == 0) {
// first word
if (holdoverWord != null && holdoverWord.length() > 0) {
word = holdoverWord + word;
holdoverWord = null;
}
}
if (i == words.size() - 1) {
// last word
if (group.getIndex() == row.getGroups().size() - 1 && word.endsWith("-")) {
// a dash at the end of a line
if (group.isHardHyphen())
holdoverWord = word;
else
holdoverWord = word.substring(0, word.length() - 1);
word = "";
}
}
lexicon.incrementEntry(word);
i++;
}
}
}
}
}
return lexicon;
}
use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.
the class SegmentationTest method testAlsacien1.
/**
* Segmentation errors reported for Alsacien.
*
* @throws Exception
*/
@Test
public void testAlsacien1(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
Map<String, Object> configMap = new HashMap<>();
configMap.put("jochre.locale", "de");
Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
JochreSession jochreSession = new JochreSession(config);
new Expectations() {
{
jochrePage.getDocument();
result = jochreDoc;
minTimes = 0;
jochreDoc.isLeftToRight();
result = true;
minTimes = 0;
}
};
String imageName = "Alsacien1.jpg";
LOG.debug(imageName);
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
List<Rectangle> textPars = new ArrayList<>();
Rectangle textPar1 = new Rectangle(715, 517, 462, 115);
// TODO: for now it's splitting this paragraph by row, since it's assuming
// paragraphs cannot be
// both outdented and indented on the same page
// Rectangle textPar2 = new Rectangle(50, 666, 1798, 1039);
Rectangle textPar3 = new Rectangle(55, 1837, 1777, 335);
Rectangle textPar4 = new Rectangle(50, 2211, 1765, 154);
Rectangle textPar5 = new Rectangle(44, 2404, 1782, 511);
Rectangle textPar6 = new Rectangle(50, 2948, 1776, 154);
Rectangle textPar7 = new Rectangle(50, 3135, 1770, 77);
// title paragraph
textPars.add(textPar1);
// textPars.add(textPar2);
textPars.add(textPar3);
textPars.add(textPar4);
textPars.add(textPar5);
textPars.add(textPar6);
textPars.add(textPar7);
int i = 0;
int j = 0;
List<Paragraph> textParagraphs = new ArrayList<>();
for (Paragraph par : sourceImage.getParagraphs()) {
Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
Rectangle expected = textPars.get(i);
Rectangle intersection = expected.intersection(real);
double realArea = real.width * real.height;
double expectedArea = expected.width * expected.height;
double intersectionArea = intersection.width * intersection.height;
double realRatio = intersectionArea / realArea;
double expectedRatio = intersectionArea / expectedArea;
LOG.debug("Paragraph " + j + ": " + par.toString());
LOG.debug("realRatio: " + realRatio);
LOG.debug("expectedRatio: " + expectedRatio);
if (realRatio > 0.8 && expectedRatio > 0.8) {
LOG.debug("Found");
textParagraphs.add(par);
i++;
}
j++;
}
assertEquals(textPars.size(), textParagraphs.size());
int[] rowCounts = new int[] { 1, 4, 2, 6, 2, 1 };
int[] wordCountsFirstRow = new int[] { 2, 0, 0, 0, 0, 0, 0 };
for (i = 0; i < textParagraphs.size(); i++) {
assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
RowOfShapes row = textParagraphs.get(i).getRows().get(0);
if (wordCountsFirstRow[i] > 0)
assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
}
}
use of com.joliciel.jochre.graphics.Paragraph in project jochre by urieli.
the class SegmentationTest method testAlsacienPlay3.
/**
* Segmentation errors reported for Alsacien play - challenging because of the
* unusual indentation.
*
* @throws Exception
*/
@Test
public void testAlsacienPlay3(@Mocked final JochrePage jochrePage, @Mocked final JochreDocument jochreDoc) throws Exception {
Map<String, Object> configMap = new HashMap<>();
configMap.put("jochre.locale", "de");
Config config = ConfigFactory.parseMap(configMap).withFallback(ConfigFactory.load());
JochreSession jochreSession = new JochreSession(config);
new Expectations() {
{
jochrePage.getDocument();
result = jochreDoc;
minTimes = 0;
jochreDoc.isLeftToRight();
result = true;
minTimes = 0;
}
};
String imageName = "AlsacienPlay3.jpg";
LOG.debug(imageName);
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/segmentation/" + imageName);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
SourceImage sourceImage = new SourceImage(jochrePage, "", image, jochreSession);
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
List<Rectangle> textPars = new ArrayList<>();
Rectangle textPar1 = new Rectangle(712, 532, 556, 52);
Rectangle textPar2 = new Rectangle(324, 600, 1324, 128);
Rectangle textPar3 = new Rectangle(680, 730, 592, 50);
Rectangle textPar4 = new Rectangle(404, 808, 684, 48);
// title paragraph
textPars.add(textPar1);
textPars.add(textPar2);
textPars.add(textPar3);
textPars.add(textPar4);
int i = 0;
int j = 0;
List<Paragraph> textParagraphs = new ArrayList<>();
for (Paragraph par : sourceImage.getParagraphs()) {
Rectangle real = new Rectangle(par.getLeft(), par.getTop(), par.getRight() - par.getLeft(), par.getBottom() - par.getTop());
Rectangle expected = textPars.get(i);
Rectangle intersection = expected.intersection(real);
double realArea = real.width * real.height;
double expectedArea = expected.width * expected.height;
double intersectionArea = intersection.width * intersection.height;
double realRatio = intersectionArea / realArea;
double expectedRatio = intersectionArea / expectedArea;
LOG.debug("Paragraph " + j + ": " + par.toString());
LOG.debug("realRatio: " + realRatio);
LOG.debug("expectedRatio: " + expectedRatio);
if (realRatio > 0.8 && expectedRatio > 0.8) {
LOG.debug("Found");
textParagraphs.add(par);
i++;
if (i >= textPars.size())
break;
}
j++;
}
assertEquals(textPars.size(), textParagraphs.size());
int[] rowCounts = new int[] { 1, 2, 1, 1 };
// TODO: words in "spaced" rows (uses spacing to emphasize instead of bold
// or italics) get split
// should try to detect multiple single letter words
int[] wordCountsFirstRow = new int[] { 0, 10, 0, 5 };
for (i = 0; i < textParagraphs.size(); i++) {
assertEquals("row count " + i, rowCounts[i], textParagraphs.get(i).getRows().size());
RowOfShapes row = textParagraphs.get(i).getRows().get(0);
if (wordCountsFirstRow[i] > 0)
assertEquals("word count " + i, wordCountsFirstRow[i], row.getGroups().size());
}
}
Aggregations