use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class SmallChupchikRightNearTopFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
Shape shape = shapeWrapper.getShape();
int xSectors = 11;
int centreSectors = 13;
int marginSectors = 1;
double[][] grid = shape.getBrightnessBySection(xSectors, centreSectors, marginSectors, SectionBrightnessMeasurementMethod.RELATIVE_TO_MAX_SECTION);
boolean foundChupchik = false;
int ySectors = grid[0].length;
boolean foundBlack = false;
boolean foundMoreBlack = false;
int testColumn = xSectors - 1;
boolean foundStuff = false;
while (!foundStuff) {
for (int i = 0; i < ySectors; i++) {
if (grid[testColumn][i] >= 0.5) {
foundStuff = true;
break;
}
}
if (!foundStuff) {
testColumn = testColumn - 1;
}
}
int startChupchik = 0;
int startWhite = 0;
int numBlackBelowChupchik = 0;
int chupchikSize = 0;
int gapSize = 0;
double maxChupchikStart = ySectors / 3;
for (int i = 0; i < ySectors; i++) {
if (!foundBlack && i < maxChupchikStart) {
if (grid[testColumn][i] >= 0.5) {
foundBlack = true;
startChupchik = i;
}
} else if (!foundChupchik && foundBlack && grid[testColumn][i] < 0.5) {
chupchikSize = i - startChupchik;
if (LOG.isTraceEnabled())
LOG.trace("Found chupchick, start=" + startChupchik + ", size=" + chupchikSize);
foundChupchik = true;
startWhite = i;
} else if (foundChupchik && grid[testColumn][i] >= 0.5) {
if (!foundMoreBlack) {
gapSize = i - startWhite;
foundMoreBlack = true;
}
numBlackBelowChupchik++;
}
}
if (foundChupchik && !foundMoreBlack) {
gapSize = ySectors - startChupchik;
}
int maxBlackBelowGap = gapSize + 3;
if (chupchikSize > 5) {
if (LOG.isTraceEnabled())
LOG.trace("Chupchik too big: " + chupchikSize);
foundChupchik = false;
} else if (gapSize < 3) {
if (LOG.isTraceEnabled())
LOG.trace("Gap size too small: " + gapSize);
foundChupchik = false;
} else if (numBlackBelowChupchik > maxBlackBelowGap) {
if (LOG.isTraceEnabled())
LOG.trace("Too much black below gap, max: " + maxBlackBelowGap + ", found: " + numBlackBelowChupchik);
foundChupchik = false;
}
FeatureResult<Boolean> outcome = this.generateResult(foundChupchik);
return outcome;
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class ThinRowFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
Shape shape = shapeWrapper.getShape();
double threshold = 0.75;
JochreImage image = shape.getJochreImage();
double averageRowHeight = image.getAverageRowHeight();
double shapeHeight = shape.getGroup().getRow().getXHeight();
double ratio = shapeHeight / averageRowHeight;
LOG.trace("averageRowHeight: " + averageRowHeight);
LOG.trace("shapeHeight: " + shapeHeight);
LOG.trace("ratio: " + ratio);
LOG.trace("threshold: " + threshold);
FeatureResult<Boolean> outcome = this.generateResult(ratio < threshold);
return outcome;
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class TouchesBaseLineFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
Shape shape = shapeWrapper.getShape();
boolean result = ((shape.getBaseLine() >= 0) && (shape.getHeight() >= shape.getBaseLine()));
FeatureResult<Boolean> outcome = this.generateResult(result);
return outcome;
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class VerticalElongationFeature method checkInternal.
@Override
public FeatureResult<Double> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
Shape shape = shapeWrapper.getShape();
double ratio = (double) shape.getHeight() / (double) shape.getWidth();
FeatureResult<Double> outcome = this.generateResult(ratio);
return outcome;
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class MostLikelyWordChooser method getFrequency.
/**
* Same as {@link #getFrequency(LetterSequence)}, but can either apply to
* the guessed word or to the real word from the training corpus.
*
* @param guessedWord
* if true, applies to the guessed word
*/
public int getFrequency(LetterSequence letterSequence, boolean guessedWord) {
int frequency = 0;
List<LetterSequence> subsequences = letterSequence.getSubsequences();
List<List<LetterSequence>> possibilities = new ArrayList<>();
possibilities.add(new ArrayList<LetterSequence>());
int lastIndex = -1;
for (int i = 0; i < subsequences.size(); i++) {
LetterSequence subsequence = subsequences.get(i);
lastIndex += subsequence.getLetters().size();
String word = null;
if (guessedWord)
word = subsequence.getGuessedWord();
else
word = subsequence.getRealWord();
List<List<LetterSequence>> newPossibilities = new ArrayList<>();
for (List<LetterSequence> possibility : possibilities) {
if (possibility.size() > 0) {
// has this subsequence already been processed ?
LetterSequence lastSequence = possibility.get(possibility.size() - 1);
Shape lastShape = lastSequence.getUnderlyingShapeSequence().get(lastSequence.getUnderlyingShapeSequence().size() - 1).getShape();
Shape myLastShape = subsequence.getUnderlyingShapeSequence().get(subsequence.getUnderlyingShapeSequence().size() - 1).getShape();
if (lastShape.equals(myLastShape)) {
newPossibilities.add(possibility);
continue;
}
}
boolean addWord = true;
if (subsequence.isPunctation()) {
if (word.equals("-") || midWordPunctuation.contains(word) || startWordPunctuation.contains(word) || endWordPunctuation.contains(word)) {
LetterSequence prevSequence = possibility.size() == 0 ? null : possibility.get(possibility.size() - 1);
LetterSequence nextSequence = i == subsequences.size() - 1 ? null : subsequences.get(i + 1);
LetterSequence prevCurrentSequence = new LetterSequence(prevSequence, subsequence);
LetterSequence currentNextSequence = new LetterSequence(subsequence, nextSequence);
LetterSequence prevCurrentNextSequence = new LetterSequence(prevCurrentSequence, nextSequence);
if (word.equals("-")) {
if (prevSequence == null && nextSequence == null) {
newPossibilities.add(possibility);
} else if (prevSequence == null) {
List<LetterSequence> newPoss = new ArrayList<>();
newPoss.add(subsequence);
newPoss.add(nextSequence);
newPossibilities.add(newPoss);
newPoss = new ArrayList<>();
newPoss.add(currentNextSequence);
newPossibilities.add(newPoss);
} else if (nextSequence == null) {
List<LetterSequence> newPoss = new ArrayList<>(possibility);
newPoss.add(subsequence);
newPossibilities.add(newPoss);
newPoss = new ArrayList<>(possibility);
newPoss.remove(newPoss.size() - 1);
newPoss.add(prevCurrentSequence);
newPossibilities.add(newPoss);
} else {
List<LetterSequence> newPoss = new ArrayList<>(possibility);
newPoss.add(subsequence);
newPoss.add(nextSequence);
newPossibilities.add(newPoss);
newPoss = new ArrayList<>(possibility);
newPoss.add(currentNextSequence);
newPossibilities.add(newPoss);
newPoss = new ArrayList<>(possibility);
newPoss.remove(newPoss.size() - 1);
newPoss.add(prevCurrentSequence);
newPoss.add(nextSequence);
newPossibilities.add(newPoss);
newPoss = new ArrayList<>(possibility);
newPoss.remove(newPoss.size() - 1);
newPoss.add(prevCurrentNextSequence);
newPossibilities.add(newPoss);
// add skipped dash possibility
if (lastIndex == letterSequence.getEndOfLineHyphenIndex()) {
subsequence.setHyphenSubsequence(subsequence);
prevCurrentNextSequence.setHyphenSubsequence(subsequence);
prevCurrentSequence.setHyphenSubsequence(subsequence);
currentNextSequence.setHyphenSubsequence(subsequence);
LetterSequence prevNextSequence = new LetterSequence(prevCurrentSequence, nextSequence);
prevNextSequence.setHyphenSubsequence(subsequence);
prevNextSequence.setSoftHyphen(true);
newPoss = new ArrayList<>(possibility);
newPoss.remove(newPoss.size() - 1);
newPoss.add(prevNextSequence);
newPossibilities.add(newPoss);
}
}
addWord = false;
}
if (midWordPunctuation.contains(word)) {
if (prevSequence != null && nextSequence != null) {
List<LetterSequence> newPoss = new ArrayList<>(possibility);
newPoss.remove(newPoss.size() - 1);
newPoss.add(prevCurrentNextSequence);
newPossibilities.add(newPoss);
addWord = false;
}
}
if (startWordPunctuation.contains(word)) {
if (nextSequence != null && !subsequences.get(subsequences.size() - 1).getGuessedWord().equals(word)) {
List<LetterSequence> newPoss = new ArrayList<>(possibility);
newPoss.add(currentNextSequence);
newPossibilities.add(newPoss);
newPoss = new ArrayList<>(possibility);
newPoss.add(subsequence);
newPoss.add(nextSequence);
newPossibilities.add(newPoss);
addWord = false;
}
}
if (endWordPunctuation.contains(word) && !subsequences.get(0).getGuessedWord().equals(word)) {
if (prevSequence != null) {
List<LetterSequence> newPoss = new ArrayList<>(possibility);
newPoss.remove(newPoss.size() - 1);
newPoss.add(prevCurrentSequence);
newPossibilities.add(newPoss);
}
}
}
}
if (addWord) {
possibility.add(subsequence);
newPossibilities.add(possibility);
}
}
possibilities = newPossibilities;
if (possibilities.size() > 1000) {
break;
}
}
TreeMap<Integer, List<List<LetterSequence>>> freqPossibilityMap = new TreeMap<>();
for (List<LetterSequence> possibility : possibilities) {
boolean hasWords = false;
for (LetterSequence subsequence : possibility) {
if (!subsequence.isPunctation()) {
hasWords = true;
break;
}
}
int minFreq = Integer.MAX_VALUE;
for (LetterSequence subsequence : possibility) {
String word = subsequence.getGuessedWord();
int freq = 0;
List<CountedOutcome<String>> frequencies = this.linguistics.getFrequencies(word);
if (frequencies.size() == 0) {
// check whether word is impossible
if (!this.linguistics.isWordPossible(word)) {
frequencies.add(new CountedOutcome<>(word, -1));
}
}
if (frequencies != null && frequencies.size() > 0) {
subsequence.setWordFrequencies(frequencies);
letterSequence.getWordFrequencies().add(frequencies.get(0));
freq = frequencies.get(0).getCount();
} else {
frequencies = new ArrayList<>();
frequencies.add(new CountedOutcome<>(word, 0));
freq = 0;
subsequence.setWordFrequencies(frequencies);
letterSequence.getWordFrequencies().add(frequencies.get(0));
}
if (subsequence.isPunctation() && hasWords) {
continue;
}
if (freq < minFreq)
minFreq = freq;
}
List<List<LetterSequence>> possibilitiesAtFreq = freqPossibilityMap.get(minFreq);
if (possibilitiesAtFreq == null) {
possibilitiesAtFreq = new ArrayList<>();
freqPossibilityMap.put(minFreq, possibilitiesAtFreq);
}
possibilitiesAtFreq.add(possibility);
}
// Out of all of the sub-sequences possibilities giving the max
// frequency in the lexicon
// we choose the one containing the single longest word to populate the
// subsequences for this letter sequence
// and select its hyphenated content.
// Thus if both halves of an existing hyphenated word also happen to
// exist independently as words in the lexicon,
// we'll still take the longer hyphenated word.
List<List<LetterSequence>> maxFreqPossibilities = freqPossibilityMap.lastEntry().getValue();
List<LetterSequence> maxLengthList = null;
int maxLengthForList = -1;
for (List<LetterSequence> possibility : maxFreqPossibilities) {
int maxLength = 0;
for (LetterSequence subsequence : possibility) {
String word = subsequence.getGuessedWord();
if (word.length() > maxLength)
maxLength = word.length();
}
if (maxLength > maxLengthForList) {
maxLengthList = possibility;
maxLengthForList = maxLength;
}
}
frequency = freqPossibilityMap.lastEntry().getKey();
letterSequence.setSubsequences(maxLengthList);
// construct the hyphenated string out of the subsequences directly
// surrounding the hyphen
// making sure to leave out any opening and closing punctuation
String hyphenatedString = "";
boolean foundFirstWord = false;
String punctuationString = "";
for (LetterSequence subsequence : maxLengthList) {
if (subsequence.getHyphenSubsequence() != null) {
letterSequence.setHyphenSubsequence(subsequence.getHyphenSubsequence());
}
if (!foundFirstWord && !subsequence.isPunctation())
foundFirstWord = true;
if (foundFirstWord && subsequence.isPunctation()) {
punctuationString += subsequence.getGuessedWord();
} else if (foundFirstWord) {
hyphenatedString += punctuationString;
punctuationString = "";
hyphenatedString += subsequence.getGuessedWord();
}
}
if (letterSequence.isSplit()) {
letterSequence.setHyphenatedString(hyphenatedString);
for (LetterSequence subsequence : maxLengthList) {
subsequence.setHyphenatedString(hyphenatedString);
}
}
return frequency;
}
Aggregations