use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class PdfImageVisitor method visitImages.
/**
* Visit all of the images in a pdf file.
*/
public final void visitImages() {
try {
int i = 0;
for (PDPage pdfPage : pdfDocument.getPages()) {
i++;
if (!pages.isEmpty() && !pages.contains(i))
continue;
LOG.info("Decoding page " + i + " (out of " + pdfDocument.getNumberOfPages() + ")");
int rotation = pdfPage.getRotation();
try {
ImageLocationExtractor imageLocationExtractor = new ImageLocationExtractor();
ImageCollector imageCollector = new ImageCollector();
imageLocationExtractor.addObserver(imageCollector);
imageLocationExtractor.processPage(pdfPage);
PDResources resources = pdfPage.getResources();
Set<Integer> combineWithNext = new HashSet<>();
for (int j = 1; j < imageCollector.getImages().size(); j++) {
// if two subsequent images are overlaid, combine them
PdfImageWithLocation image1 = imageCollector.getImages().get(j - 1);
PdfImageWithLocation image2 = imageCollector.getImages().get(j);
float intersection = Float.max(0f, Float.min(image2.scaledLeft + image2.scaledWidth, image1.scaledLeft + image1.scaledWidth) - Float.max(image2.scaledLeft, image1.scaledLeft)) * Float.max(0f, Float.min(image2.scaledTop + image2.scaledHeight, image1.scaledTop + image1.scaledHeight) - Float.max(image2.scaledTop, image1.scaledTop));
float image1Area = image1.scaledWidth * image1.scaledHeight;
float image2Area = image2.scaledWidth * image2.scaledHeight;
float union = image1Area + image2Area - intersection;
float ratio = intersection / union;
// For simplicity, we assume images are either entirely overlaid, or not overlaid at all
if (ratio > 0.99f) {
combineWithNext.add(j - 1);
}
}
BufferedImage previousImage = null;
for (int j = 0; j < imageCollector.getImages().size(); j++) {
PdfImageWithLocation imageWithLocation = imageCollector.getImages().get(j);
PDImageXObject pdfImage = imageWithLocation.getImage();
LOG.debug("Found image of type " + pdfImage.getSuffix());
BufferedImage image = pdfImage.getImage();
if (image == null) {
throw new PdfImageExtractionException("Something went wrong: unable to extract image " + j + " in file " + pdfFile.getAbsolutePath() + ", page " + i);
}
if (combineWithNext.contains(j - 1)) {
assert previousImage != null;
image = ImageUtils.overlayImages(previousImage, image);
}
if (rotation != 0) {
LOG.debug("Page rotation:" + rotation);
image = ImageUtils.rotate(image, rotation);
}
if (combineWithNext.contains(j)) {
previousImage = image;
} else {
for (PdfImageObserver imageObserver : imageObservers) {
imageObserver.visitImage(image, imageWithLocation.getName(), i, j);
}
}
}
} catch (PdfImageExtractionException e) {
LOG.error("Error in file " + pdfFile.getAbsolutePath() + ", page " + i, e);
if (stopOnError)
throw e;
} catch (IOException e) {
LOG.error("Error in file " + pdfFile.getAbsolutePath() + ", page " + i, e);
if (stopOnError)
throw new RuntimeException(e);
} catch (JochreException e) {
LOG.error("Error in file " + pdfFile.getAbsolutePath() + ", page " + i, e);
if (stopOnError)
throw e;
}
}
// next page
} finally {
this.close();
}
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class ErrorLogger method onGuessSequence.
@Override
public void onGuessSequence(LetterSequence letterSequence) {
try {
if (!letterSequence.getRealWord().equals(letterSequence.getGuessedWord())) {
if (!currentImageWritten) {
errorWriter.write("\n" + currentImage.getPage().getDocument().getName() + ", " + currentImage.getPage().getIndex() + ", id: " + currentImage.getId() + "\n");
currentImageWritten = true;
}
int realWordFrequency = Integer.MAX_VALUE;
int guessedWordFrequency = letterSequence.getFrequency();
List<String> words = jochreSession.getLinguistics().splitText(letterSequence.getRealWord());
for (String word : words) {
int frequency = jochreSession.getLexicon().getFrequency(word);
if (frequency < realWordFrequency)
realWordFrequency = frequency;
}
errorWriter.write("Guess: " + letterSequence.getGuessedSequence() + ". Freq: " + guessedWordFrequency + "\n");
errorWriter.write("Real: " + letterSequence.getRealSequence() + ". Freq: " + realWordFrequency + "\n");
errorWriter.flush();
}
} catch (IOException ioe) {
throw new JochreException(ioe);
}
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class OriginalShapeLetterAssigner method assignLetter.
void assignLetter(Shape originalShape, List<ShapeInSequence> subsequenceForOriginalShape) {
String guessedLetter = "";
for (ShapeInSequence shapeInSubSequence : subsequenceForOriginalShape) {
if (shapeInSubSequence.getOriginalShapes().size() == 1) {
// if this subsequence shape has only one original shape,
// we can go ahead and add the subsequence shape's letter to the original shape
guessedLetter += shapeInSubSequence.getShape().getOriginalGuess();
} else {
// the subsequence shape has multiple original shapes, so its letter has to be
// split among all of them (these original shapes were joined into a single new shape)
int j = 0;
int myIndex = -1;
for (Shape myOriginalShape : shapeInSubSequence.getOriginalShapes()) {
if (myOriginalShape.equals(originalShape)) {
myIndex = j;
break;
}
j++;
}
if (myIndex == 0) {
// the original shape starts this subsequence shape
if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
guessedLetter += "|" + shapeInSubSequence.getShape().getOriginalGuess();
} else if (myIndex == shapeInSubSequence.getOriginalShapes().size() - 1) {
// the original shape ends this subsequence shape
if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
guessedLetter += shapeInSubSequence.getShape().getOriginalGuess() + "|";
} else {
// the original shape is in the middle of this subsequence shape
// nothing to do here, since we leave these blank
}
// if more than one, where is the original shape in this subsequence's original shapes
}
// only one original shape for this subsequence shape, or more?
}
// next shape in subsequence for this original shape
originalShape.setOriginalGuess(guessedLetter);
if (currentImage.getImageStatus().equals(ImageStatus.AUTO_NEW))
originalShape.setLetter(guessedLetter);
if (save)
originalShape.save();
if (evaluate && stillValid) {
if (letterValidator == null) {
throw new JochreException("Cannot evaluate without a letter validator.");
}
String realLetter = originalShape.getLetter();
String realLetterForCheck = realLetter.replace("|", "");
if (letterValidator.validate(realLetterForCheck)) {
if (guessedLetter.startsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
// the guessed letter is the first half of a split dual letter, and is the same as a real letter
this.incrementFScore(realLetter, realLetter);
} else if (guessedLetter.endsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
// the guessed letter is the second half of a split dual letter, and is the same as a real letter
this.incrementFScore(realLetter, realLetter);
} else if (realLetter.startsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
// the real letter is the first half of a split dual letter, and we correctly guessed the first letter of the two
this.incrementFScore(realLetter, realLetter);
} else if (realLetter.endsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
// the real letter is the second half of a split dual letter, and we correctly guessed the second letter of the two
this.incrementFScore(realLetter, realLetter);
} else {
this.incrementFScore(realLetter, guessedLetter);
if (realLetter.equals(guessedLetter))
hasError = true;
}
} else {
// check if there are any invalid characters
String prevChar = "";
for (int i = 0; i < realLetterForCheck.length(); i++) {
String nextChar = "" + realLetterForCheck.charAt(i);
if (letterValidator.validate(nextChar)) {
// do nothing
} else if (letterValidator.validate(prevChar + nextChar)) {
// do nothing
} else {
stillValid = false;
break;
}
prevChar = nextChar;
}
if (stillValid) {
this.incrementFScore(realLetter, guessedLetter);
}
}
}
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class TextFileLexicon method deserialize.
public static Lexicon deserialize(File memoryBaseFile) {
LOG.debug("deserializeMemoryBase");
boolean isZip = false;
if (memoryBaseFile.getName().endsWith(".zip"))
isZip = true;
Lexicon memoryBase = null;
ZipInputStream zis = null;
FileInputStream fis = null;
ObjectInputStream in = null;
try {
fis = new FileInputStream(memoryBaseFile);
if (isZip) {
zis = new ZipInputStream(fis);
memoryBase = TextFileLexicon.deserialize(zis);
} else {
in = new ObjectInputStream(fis);
try {
memoryBase = (TextFileLexicon) in.readObject();
} finally {
in.close();
}
}
} catch (IOException ioe) {
throw new JochreException(ioe);
} catch (ClassNotFoundException cnfe) {
throw new JochreException(cnfe);
}
return memoryBase;
}
use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.
the class FScoreCalculator method writeScoresToCSVFile.
public void writeScoresToCSVFile(File fscoreFile) {
try {
fscoreFile.delete();
fscoreFile.createNewFile();
Writer fscoreFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, false), "UTF8"));
try {
this.writeScoresToCSV(fscoreFileWriter);
} finally {
fscoreFileWriter.flush();
fscoreFileWriter.close();
}
} catch (IOException ioe) {
throw new JochreException(ioe);
}
}
Aggregations