Search in sources :

Example 46 with Shape

use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.

the class SmallChupchikRightNearTopFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
    Shape shape = shapeWrapper.getShape();
    int xSectors = 11;
    int centreSectors = 13;
    int marginSectors = 1;
    double[][] grid = shape.getBrightnessBySection(xSectors, centreSectors, marginSectors, SectionBrightnessMeasurementMethod.RELATIVE_TO_MAX_SECTION);
    boolean foundChupchik = false;
    int ySectors = grid[0].length;
    boolean foundBlack = false;
    boolean foundMoreBlack = false;
    int testColumn = xSectors - 1;
    boolean foundStuff = false;
    while (!foundStuff) {
        for (int i = 0; i < ySectors; i++) {
            if (grid[testColumn][i] >= 0.5) {
                foundStuff = true;
                break;
            }
        }
        if (!foundStuff) {
            testColumn = testColumn - 1;
        }
    }
    int startChupchik = 0;
    int startWhite = 0;
    int numBlackBelowChupchik = 0;
    int chupchikSize = 0;
    int gapSize = 0;
    double maxChupchikStart = ySectors / 3;
    for (int i = 0; i < ySectors; i++) {
        if (!foundBlack && i < maxChupchikStart) {
            if (grid[testColumn][i] >= 0.5) {
                foundBlack = true;
                startChupchik = i;
            }
        } else if (!foundChupchik && foundBlack && grid[testColumn][i] < 0.5) {
            chupchikSize = i - startChupchik;
            if (LOG.isTraceEnabled())
                LOG.trace("Found chupchick, start=" + startChupchik + ", size=" + chupchikSize);
            foundChupchik = true;
            startWhite = i;
        } else if (foundChupchik && grid[testColumn][i] >= 0.5) {
            if (!foundMoreBlack) {
                gapSize = i - startWhite;
                foundMoreBlack = true;
            }
            numBlackBelowChupchik++;
        }
    }
    if (foundChupchik && !foundMoreBlack) {
        gapSize = ySectors - startChupchik;
    }
    int maxBlackBelowGap = gapSize + 3;
    if (chupchikSize > 5) {
        if (LOG.isTraceEnabled())
            LOG.trace("Chupchik too big: " + chupchikSize);
        foundChupchik = false;
    } else if (gapSize < 3) {
        if (LOG.isTraceEnabled())
            LOG.trace("Gap size too small: " + gapSize);
        foundChupchik = false;
    } else if (numBlackBelowChupchik > maxBlackBelowGap) {
        if (LOG.isTraceEnabled())
            LOG.trace("Too much black below gap, max: " + maxBlackBelowGap + ", found: " + numBlackBelowChupchik);
        foundChupchik = false;
    }
    FeatureResult<Boolean> outcome = this.generateResult(foundChupchik);
    return outcome;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape)

Example 47 with Shape

use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.

the class ThinRowFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
    Shape shape = shapeWrapper.getShape();
    double threshold = 0.75;
    JochreImage image = shape.getJochreImage();
    double averageRowHeight = image.getAverageRowHeight();
    double shapeHeight = shape.getGroup().getRow().getXHeight();
    double ratio = shapeHeight / averageRowHeight;
    LOG.trace("averageRowHeight: " + averageRowHeight);
    LOG.trace("shapeHeight: " + shapeHeight);
    LOG.trace("ratio: " + ratio);
    LOG.trace("threshold: " + threshold);
    FeatureResult<Boolean> outcome = this.generateResult(ratio < threshold);
    return outcome;
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape)

Example 48 with Shape

use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.

the class TouchesBaseLineFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
    Shape shape = shapeWrapper.getShape();
    boolean result = ((shape.getBaseLine() >= 0) && (shape.getHeight() >= shape.getBaseLine()));
    FeatureResult<Boolean> outcome = this.generateResult(result);
    return outcome;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape)

Example 49 with Shape

use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.

the class VerticalElongationFeature method checkInternal.

@Override
public FeatureResult<Double> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
    Shape shape = shapeWrapper.getShape();
    double ratio = (double) shape.getHeight() / (double) shape.getWidth();
    FeatureResult<Double> outcome = this.generateResult(ratio);
    return outcome;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape)

Example 50 with Shape

use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.

the class MostLikelyWordChooser method getFrequency.

/**
 * Same as {@link #getFrequency(LetterSequence)}, but can either apply to
 * the guessed word or to the real word from the training corpus.
 *
 * @param guessedWord
 *            if true, applies to the guessed word
 */
public int getFrequency(LetterSequence letterSequence, boolean guessedWord) {
    int frequency = 0;
    List<LetterSequence> subsequences = letterSequence.getSubsequences();
    List<List<LetterSequence>> possibilities = new ArrayList<>();
    possibilities.add(new ArrayList<LetterSequence>());
    int lastIndex = -1;
    for (int i = 0; i < subsequences.size(); i++) {
        LetterSequence subsequence = subsequences.get(i);
        lastIndex += subsequence.getLetters().size();
        String word = null;
        if (guessedWord)
            word = subsequence.getGuessedWord();
        else
            word = subsequence.getRealWord();
        List<List<LetterSequence>> newPossibilities = new ArrayList<>();
        for (List<LetterSequence> possibility : possibilities) {
            if (possibility.size() > 0) {
                // has this subsequence already been processed ?
                LetterSequence lastSequence = possibility.get(possibility.size() - 1);
                Shape lastShape = lastSequence.getUnderlyingShapeSequence().get(lastSequence.getUnderlyingShapeSequence().size() - 1).getShape();
                Shape myLastShape = subsequence.getUnderlyingShapeSequence().get(subsequence.getUnderlyingShapeSequence().size() - 1).getShape();
                if (lastShape.equals(myLastShape)) {
                    newPossibilities.add(possibility);
                    continue;
                }
            }
            boolean addWord = true;
            if (subsequence.isPunctation()) {
                if (word.equals("-") || midWordPunctuation.contains(word) || startWordPunctuation.contains(word) || endWordPunctuation.contains(word)) {
                    LetterSequence prevSequence = possibility.size() == 0 ? null : possibility.get(possibility.size() - 1);
                    LetterSequence nextSequence = i == subsequences.size() - 1 ? null : subsequences.get(i + 1);
                    LetterSequence prevCurrentSequence = new LetterSequence(prevSequence, subsequence);
                    LetterSequence currentNextSequence = new LetterSequence(subsequence, nextSequence);
                    LetterSequence prevCurrentNextSequence = new LetterSequence(prevCurrentSequence, nextSequence);
                    if (word.equals("-")) {
                        if (prevSequence == null && nextSequence == null) {
                            newPossibilities.add(possibility);
                        } else if (prevSequence == null) {
                            List<LetterSequence> newPoss = new ArrayList<>();
                            newPoss.add(subsequence);
                            newPoss.add(nextSequence);
                            newPossibilities.add(newPoss);
                            newPoss = new ArrayList<>();
                            newPoss.add(currentNextSequence);
                            newPossibilities.add(newPoss);
                        } else if (nextSequence == null) {
                            List<LetterSequence> newPoss = new ArrayList<>(possibility);
                            newPoss.add(subsequence);
                            newPossibilities.add(newPoss);
                            newPoss = new ArrayList<>(possibility);
                            newPoss.remove(newPoss.size() - 1);
                            newPoss.add(prevCurrentSequence);
                            newPossibilities.add(newPoss);
                        } else {
                            List<LetterSequence> newPoss = new ArrayList<>(possibility);
                            newPoss.add(subsequence);
                            newPoss.add(nextSequence);
                            newPossibilities.add(newPoss);
                            newPoss = new ArrayList<>(possibility);
                            newPoss.add(currentNextSequence);
                            newPossibilities.add(newPoss);
                            newPoss = new ArrayList<>(possibility);
                            newPoss.remove(newPoss.size() - 1);
                            newPoss.add(prevCurrentSequence);
                            newPoss.add(nextSequence);
                            newPossibilities.add(newPoss);
                            newPoss = new ArrayList<>(possibility);
                            newPoss.remove(newPoss.size() - 1);
                            newPoss.add(prevCurrentNextSequence);
                            newPossibilities.add(newPoss);
                            // add skipped dash possibility
                            if (lastIndex == letterSequence.getEndOfLineHyphenIndex()) {
                                subsequence.setHyphenSubsequence(subsequence);
                                prevCurrentNextSequence.setHyphenSubsequence(subsequence);
                                prevCurrentSequence.setHyphenSubsequence(subsequence);
                                currentNextSequence.setHyphenSubsequence(subsequence);
                                LetterSequence prevNextSequence = new LetterSequence(prevCurrentSequence, nextSequence);
                                prevNextSequence.setHyphenSubsequence(subsequence);
                                prevNextSequence.setSoftHyphen(true);
                                newPoss = new ArrayList<>(possibility);
                                newPoss.remove(newPoss.size() - 1);
                                newPoss.add(prevNextSequence);
                                newPossibilities.add(newPoss);
                            }
                        }
                        addWord = false;
                    }
                    if (midWordPunctuation.contains(word)) {
                        if (prevSequence != null && nextSequence != null) {
                            List<LetterSequence> newPoss = new ArrayList<>(possibility);
                            newPoss.remove(newPoss.size() - 1);
                            newPoss.add(prevCurrentNextSequence);
                            newPossibilities.add(newPoss);
                            addWord = false;
                        }
                    }
                    if (startWordPunctuation.contains(word)) {
                        if (nextSequence != null && !subsequences.get(subsequences.size() - 1).getGuessedWord().equals(word)) {
                            List<LetterSequence> newPoss = new ArrayList<>(possibility);
                            newPoss.add(currentNextSequence);
                            newPossibilities.add(newPoss);
                            newPoss = new ArrayList<>(possibility);
                            newPoss.add(subsequence);
                            newPoss.add(nextSequence);
                            newPossibilities.add(newPoss);
                            addWord = false;
                        }
                    }
                    if (endWordPunctuation.contains(word) && !subsequences.get(0).getGuessedWord().equals(word)) {
                        if (prevSequence != null) {
                            List<LetterSequence> newPoss = new ArrayList<>(possibility);
                            newPoss.remove(newPoss.size() - 1);
                            newPoss.add(prevCurrentSequence);
                            newPossibilities.add(newPoss);
                        }
                    }
                }
            }
            if (addWord) {
                possibility.add(subsequence);
                newPossibilities.add(possibility);
            }
        }
        possibilities = newPossibilities;
        if (possibilities.size() > 1000) {
            break;
        }
    }
    TreeMap<Integer, List<List<LetterSequence>>> freqPossibilityMap = new TreeMap<>();
    for (List<LetterSequence> possibility : possibilities) {
        boolean hasWords = false;
        for (LetterSequence subsequence : possibility) {
            if (!subsequence.isPunctation()) {
                hasWords = true;
                break;
            }
        }
        int minFreq = Integer.MAX_VALUE;
        for (LetterSequence subsequence : possibility) {
            String word = subsequence.getGuessedWord();
            int freq = 0;
            List<CountedOutcome<String>> frequencies = this.linguistics.getFrequencies(word);
            if (frequencies.size() == 0) {
                // check whether word is impossible
                if (!this.linguistics.isWordPossible(word)) {
                    frequencies.add(new CountedOutcome<>(word, -1));
                }
            }
            if (frequencies != null && frequencies.size() > 0) {
                subsequence.setWordFrequencies(frequencies);
                letterSequence.getWordFrequencies().add(frequencies.get(0));
                freq = frequencies.get(0).getCount();
            } else {
                frequencies = new ArrayList<>();
                frequencies.add(new CountedOutcome<>(word, 0));
                freq = 0;
                subsequence.setWordFrequencies(frequencies);
                letterSequence.getWordFrequencies().add(frequencies.get(0));
            }
            if (subsequence.isPunctation() && hasWords) {
                continue;
            }
            if (freq < minFreq)
                minFreq = freq;
        }
        List<List<LetterSequence>> possibilitiesAtFreq = freqPossibilityMap.get(minFreq);
        if (possibilitiesAtFreq == null) {
            possibilitiesAtFreq = new ArrayList<>();
            freqPossibilityMap.put(minFreq, possibilitiesAtFreq);
        }
        possibilitiesAtFreq.add(possibility);
    }
    // Out of all of the sub-sequences possibilities giving the max
    // frequency in the lexicon
    // we choose the one containing the single longest word to populate the
    // subsequences for this letter sequence
    // and select its hyphenated content.
    // Thus if both halves of an existing hyphenated word also happen to
    // exist independently as words in the lexicon,
    // we'll still take the longer hyphenated word.
    List<List<LetterSequence>> maxFreqPossibilities = freqPossibilityMap.lastEntry().getValue();
    List<LetterSequence> maxLengthList = null;
    int maxLengthForList = -1;
    for (List<LetterSequence> possibility : maxFreqPossibilities) {
        int maxLength = 0;
        for (LetterSequence subsequence : possibility) {
            String word = subsequence.getGuessedWord();
            if (word.length() > maxLength)
                maxLength = word.length();
        }
        if (maxLength > maxLengthForList) {
            maxLengthList = possibility;
            maxLengthForList = maxLength;
        }
    }
    frequency = freqPossibilityMap.lastEntry().getKey();
    letterSequence.setSubsequences(maxLengthList);
    // construct the hyphenated string out of the subsequences directly
    // surrounding the hyphen
    // making sure to leave out any opening and closing punctuation
    String hyphenatedString = "";
    boolean foundFirstWord = false;
    String punctuationString = "";
    for (LetterSequence subsequence : maxLengthList) {
        if (subsequence.getHyphenSubsequence() != null) {
            letterSequence.setHyphenSubsequence(subsequence.getHyphenSubsequence());
        }
        if (!foundFirstWord && !subsequence.isPunctation())
            foundFirstWord = true;
        if (foundFirstWord && subsequence.isPunctation()) {
            punctuationString += subsequence.getGuessedWord();
        } else if (foundFirstWord) {
            hyphenatedString += punctuationString;
            punctuationString = "";
            hyphenatedString += subsequence.getGuessedWord();
        }
    }
    if (letterSequence.isSplit()) {
        letterSequence.setHyphenatedString(hyphenatedString);
        for (LetterSequence subsequence : maxLengthList) {
            subsequence.setHyphenatedString(hyphenatedString);
        }
    }
    return frequency;
}
Also used : LetterSequence(com.joliciel.jochre.letterGuesser.LetterSequence) Shape(com.joliciel.jochre.graphics.Shape) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) CountedOutcome(com.joliciel.talismane.utils.CountedOutcome) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

Shape (com.joliciel.jochre.graphics.Shape)74 ArrayList (java.util.ArrayList)22 GroupOfShapes (com.joliciel.jochre.graphics.GroupOfShapes)14 JochreImage (com.joliciel.jochre.graphics.JochreImage)13 Paragraph (com.joliciel.jochre.graphics.Paragraph)9 RowOfShapes (com.joliciel.jochre.graphics.RowOfShapes)9 Decision (com.joliciel.talismane.machineLearning.Decision)8 Test (org.junit.Test)8 JochreSession (com.joliciel.jochre.JochreSession)7 JochrePage (com.joliciel.jochre.doc.JochrePage)7 Config (com.typesafe.config.Config)7 TreeSet (java.util.TreeSet)7 JochreDocument (com.joliciel.jochre.doc.JochreDocument)6 BufferedImage (java.awt.image.BufferedImage)6 ShapeInSequence (com.joliciel.jochre.boundaries.ShapeInSequence)5 ShapeSequence (com.joliciel.jochre.boundaries.ShapeSequence)5 GraphicsDao (com.joliciel.jochre.graphics.GraphicsDao)5 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)5 SplitFeature (com.joliciel.jochre.boundaries.features.SplitFeature)4 JochreException (com.joliciel.jochre.utils.JochreException)4