Search in sources :

Example 6 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class PdfImageVisitor method visitImages.

/**
 * Visit all of the images in a pdf file.
 */
public final void visitImages() {
    try {
        int i = 0;
        for (PDPage pdfPage : pdfDocument.getPages()) {
            i++;
            if (!pages.isEmpty() && !pages.contains(i))
                continue;
            LOG.info("Decoding page " + i + " (out of " + pdfDocument.getNumberOfPages() + ")");
            int rotation = pdfPage.getRotation();
            try {
                ImageLocationExtractor imageLocationExtractor = new ImageLocationExtractor();
                ImageCollector imageCollector = new ImageCollector();
                imageLocationExtractor.addObserver(imageCollector);
                imageLocationExtractor.processPage(pdfPage);
                PDResources resources = pdfPage.getResources();
                Set<Integer> combineWithNext = new HashSet<>();
                for (int j = 1; j < imageCollector.getImages().size(); j++) {
                    // if two subsequent images are overlaid, combine them
                    PdfImageWithLocation image1 = imageCollector.getImages().get(j - 1);
                    PdfImageWithLocation image2 = imageCollector.getImages().get(j);
                    float intersection = Float.max(0f, Float.min(image2.scaledLeft + image2.scaledWidth, image1.scaledLeft + image1.scaledWidth) - Float.max(image2.scaledLeft, image1.scaledLeft)) * Float.max(0f, Float.min(image2.scaledTop + image2.scaledHeight, image1.scaledTop + image1.scaledHeight) - Float.max(image2.scaledTop, image1.scaledTop));
                    float image1Area = image1.scaledWidth * image1.scaledHeight;
                    float image2Area = image2.scaledWidth * image2.scaledHeight;
                    float union = image1Area + image2Area - intersection;
                    float ratio = intersection / union;
                    // For simplicity, we assume images are either entirely overlaid, or not overlaid at all
                    if (ratio > 0.99f) {
                        combineWithNext.add(j - 1);
                    }
                }
                BufferedImage previousImage = null;
                for (int j = 0; j < imageCollector.getImages().size(); j++) {
                    PdfImageWithLocation imageWithLocation = imageCollector.getImages().get(j);
                    PDImageXObject pdfImage = imageWithLocation.getImage();
                    LOG.debug("Found image of type " + pdfImage.getSuffix());
                    BufferedImage image = pdfImage.getImage();
                    if (image == null) {
                        throw new PdfImageExtractionException("Something went wrong: unable to extract image " + j + " in file  " + pdfFile.getAbsolutePath() + ", page " + i);
                    }
                    if (combineWithNext.contains(j - 1)) {
                        assert previousImage != null;
                        image = ImageUtils.overlayImages(previousImage, image);
                    }
                    if (rotation != 0) {
                        LOG.debug("Page rotation:" + rotation);
                        image = ImageUtils.rotate(image, rotation);
                    }
                    if (combineWithNext.contains(j)) {
                        previousImage = image;
                    } else {
                        for (PdfImageObserver imageObserver : imageObservers) {
                            imageObserver.visitImage(image, imageWithLocation.getName(), i, j);
                        }
                    }
                }
            } catch (PdfImageExtractionException e) {
                LOG.error("Error in file  " + pdfFile.getAbsolutePath() + ", page " + i, e);
                if (stopOnError)
                    throw e;
            } catch (IOException e) {
                LOG.error("Error in file  " + pdfFile.getAbsolutePath() + ", page " + i, e);
                if (stopOnError)
                    throw new RuntimeException(e);
            } catch (JochreException e) {
                LOG.error("Error in file  " + pdfFile.getAbsolutePath() + ", page " + i, e);
                if (stopOnError)
                    throw e;
            }
        }
    // next page
    } finally {
        this.close();
    }
}
Also used : PDPage(org.apache.pdfbox.pdmodel.PDPage) PDResources(org.apache.pdfbox.pdmodel.PDResources) IOException(java.io.IOException) BufferedImage(java.awt.image.BufferedImage) PDImageXObject(org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) JochreException(com.joliciel.jochre.utils.JochreException)

Example 7 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class ErrorLogger method onGuessSequence.

@Override
public void onGuessSequence(LetterSequence letterSequence) {
    try {
        if (!letterSequence.getRealWord().equals(letterSequence.getGuessedWord())) {
            if (!currentImageWritten) {
                errorWriter.write("\n" + currentImage.getPage().getDocument().getName() + ", " + currentImage.getPage().getIndex() + ", id: " + currentImage.getId() + "\n");
                currentImageWritten = true;
            }
            int realWordFrequency = Integer.MAX_VALUE;
            int guessedWordFrequency = letterSequence.getFrequency();
            List<String> words = jochreSession.getLinguistics().splitText(letterSequence.getRealWord());
            for (String word : words) {
                int frequency = jochreSession.getLexicon().getFrequency(word);
                if (frequency < realWordFrequency)
                    realWordFrequency = frequency;
            }
            errorWriter.write("Guess: " + letterSequence.getGuessedSequence() + ". Freq: " + guessedWordFrequency + "\n");
            errorWriter.write("Real:  " + letterSequence.getRealSequence() + ". Freq: " + realWordFrequency + "\n");
            errorWriter.flush();
        }
    } catch (IOException ioe) {
        throw new JochreException(ioe);
    }
}
Also used : JochreException(com.joliciel.jochre.utils.JochreException) IOException(java.io.IOException)

Example 8 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class OriginalShapeLetterAssigner method assignLetter.

void assignLetter(Shape originalShape, List<ShapeInSequence> subsequenceForOriginalShape) {
    String guessedLetter = "";
    for (ShapeInSequence shapeInSubSequence : subsequenceForOriginalShape) {
        if (shapeInSubSequence.getOriginalShapes().size() == 1) {
            // if this subsequence shape has only one original shape,
            // we can go ahead and add the subsequence shape's letter to the original shape
            guessedLetter += shapeInSubSequence.getShape().getOriginalGuess();
        } else {
            // the subsequence shape has multiple original shapes, so its letter has to be
            // split among all of them (these original shapes were joined into a single new shape)
            int j = 0;
            int myIndex = -1;
            for (Shape myOriginalShape : shapeInSubSequence.getOriginalShapes()) {
                if (myOriginalShape.equals(originalShape)) {
                    myIndex = j;
                    break;
                }
                j++;
            }
            if (myIndex == 0) {
                // the original shape starts this subsequence shape
                if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
                    guessedLetter += "|" + shapeInSubSequence.getShape().getOriginalGuess();
            } else if (myIndex == shapeInSubSequence.getOriginalShapes().size() - 1) {
                // the original shape ends this subsequence shape
                if (shapeInSubSequence.getShape().getOriginalGuess().length() > 0)
                    guessedLetter += shapeInSubSequence.getShape().getOriginalGuess() + "|";
            } else {
            // the original shape is in the middle of this subsequence shape
            // nothing to do here, since we leave these blank
            }
        // if more than one, where is the original shape in this subsequence's original shapes
        }
    // only one original shape for this subsequence shape, or more?
    }
    // next shape in subsequence for this original shape
    originalShape.setOriginalGuess(guessedLetter);
    if (currentImage.getImageStatus().equals(ImageStatus.AUTO_NEW))
        originalShape.setLetter(guessedLetter);
    if (save)
        originalShape.save();
    if (evaluate && stillValid) {
        if (letterValidator == null) {
            throw new JochreException("Cannot evaluate without a letter validator.");
        }
        String realLetter = originalShape.getLetter();
        String realLetterForCheck = realLetter.replace("|", "");
        if (letterValidator.validate(realLetterForCheck)) {
            if (guessedLetter.startsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
                // the guessed letter is the first half of a split dual letter, and is the same as a real letter
                this.incrementFScore(realLetter, realLetter);
            } else if (guessedLetter.endsWith("|") && guessedLetter.length() == 3 && realLetter.equals("" + guessedLetter.charAt(1))) {
                // the guessed letter is the second half of a split dual letter, and is the same as a real letter
                this.incrementFScore(realLetter, realLetter);
            } else if (realLetter.startsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
                // the real letter is the first half of a split dual letter, and we correctly guessed the first letter of the two
                this.incrementFScore(realLetter, realLetter);
            } else if (realLetter.endsWith("|") && realLetter.length() == 3 && guessedLetter.equals("" + realLetter.charAt(1))) {
                // the real letter is the second half of a split dual letter, and we correctly guessed the second letter of the two
                this.incrementFScore(realLetter, realLetter);
            } else {
                this.incrementFScore(realLetter, guessedLetter);
                if (realLetter.equals(guessedLetter))
                    hasError = true;
            }
        } else {
            // check if there are any invalid characters
            String prevChar = "";
            for (int i = 0; i < realLetterForCheck.length(); i++) {
                String nextChar = "" + realLetterForCheck.charAt(i);
                if (letterValidator.validate(nextChar)) {
                // do nothing
                } else if (letterValidator.validate(prevChar + nextChar)) {
                // do nothing
                } else {
                    stillValid = false;
                    break;
                }
                prevChar = nextChar;
            }
            if (stillValid) {
                this.incrementFScore(realLetter, guessedLetter);
            }
        }
    }
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) JochreException(com.joliciel.jochre.utils.JochreException) ShapeInSequence(com.joliciel.jochre.boundaries.ShapeInSequence)

Example 9 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class TextFileLexicon method deserialize.

public static Lexicon deserialize(File memoryBaseFile) {
    LOG.debug("deserializeMemoryBase");
    boolean isZip = false;
    if (memoryBaseFile.getName().endsWith(".zip"))
        isZip = true;
    Lexicon memoryBase = null;
    ZipInputStream zis = null;
    FileInputStream fis = null;
    ObjectInputStream in = null;
    try {
        fis = new FileInputStream(memoryBaseFile);
        if (isZip) {
            zis = new ZipInputStream(fis);
            memoryBase = TextFileLexicon.deserialize(zis);
        } else {
            in = new ObjectInputStream(fis);
            try {
                memoryBase = (TextFileLexicon) in.readObject();
            } finally {
                in.close();
            }
        }
    } catch (IOException ioe) {
        throw new JochreException(ioe);
    } catch (ClassNotFoundException cnfe) {
        throw new JochreException(cnfe);
    }
    return memoryBase;
}
Also used : ZipInputStream(java.util.zip.ZipInputStream) JochreException(com.joliciel.jochre.utils.JochreException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) ObjectInputStream(java.io.ObjectInputStream)

Example 10 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class FScoreCalculator method writeScoresToCSVFile.

public void writeScoresToCSVFile(File fscoreFile) {
    try {
        fscoreFile.delete();
        fscoreFile.createNewFile();
        Writer fscoreFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fscoreFile, false), "UTF8"));
        try {
            this.writeScoresToCSV(fscoreFileWriter);
        } finally {
            fscoreFileWriter.flush();
            fscoreFileWriter.close();
        }
    } catch (IOException ioe) {
        throw new JochreException(ioe);
    }
}
Also used : JochreException(com.joliciel.jochre.utils.JochreException) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Aggregations

JochreException (com.joliciel.jochre.utils.JochreException)23 IOException (java.io.IOException)15 BufferedImage (java.awt.image.BufferedImage)7 File (java.io.File)7 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)5 Shape (com.joliciel.jochre.graphics.Shape)4 ClassificationEventStream (com.joliciel.talismane.machineLearning.ClassificationEventStream)4 ClassificationModelTrainer (com.joliciel.talismane.machineLearning.ClassificationModelTrainer)4 ModelTrainerFactory (com.joliciel.talismane.machineLearning.ModelTrainerFactory)4 BoundaryDetector (com.joliciel.jochre.boundaries.BoundaryDetector)3 DeterministicBoundaryDetector (com.joliciel.jochre.boundaries.DeterministicBoundaryDetector)3 LetterByLetterBoundaryDetector (com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector)3 OriginalBoundaryDetector (com.joliciel.jochre.boundaries.OriginalBoundaryDetector)3 JochreImage (com.joliciel.jochre.graphics.JochreImage)3 LetterFeature (com.joliciel.jochre.letterGuesser.features.LetterFeature)3 LetterFeatureParser (com.joliciel.jochre.letterGuesser.features.LetterFeatureParser)3 TreeSet (java.util.TreeSet)3 BeamSearchImageAnalyser (com.joliciel.jochre.analyser.BeamSearchImageAnalyser)2 ImageAnalyser (com.joliciel.jochre.analyser.ImageAnalyser)2 LetterAssigner (com.joliciel.jochre.analyser.LetterAssigner)2