use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class TrainingCorpusShapeSplitterTest method testSplit.
@Test
public void testSplit() throws Exception {
System.setProperty("config.file", "src/test/resources/testDualCharacters.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
final JochreSession jochreSession = new JochreSession(config);
final Shape shape = mock(Shape.class);
final Shape shape1 = mock(Shape.class);
final Shape shape2 = mock(Shape.class);
final Shape shape3 = mock(Shape.class);
final Shape shape4 = mock(Shape.class);
final GroupOfShapes group = mock(GroupOfShapes.class);
final RowOfShapes row = mock(RowOfShapes.class);
final Paragraph paragraph = mock(Paragraph.class);
final JochreImage jochreImage = mock(JochreImage.class);
final JochrePage jochrePage = mock(JochrePage.class);
final JochreDocument jochreDocument = mock(JochreDocument.class);
final Iterator<Split> i = (Iterator<Split>) mock(Iterator.class);
final List<Split> splits = (List<Split>) mock(List.class);
final Split split1 = mock(Split.class);
final Split split2 = mock(Split.class);
final Split split3 = mock(Split.class);
when(shape.getLetter()).thenReturn("אָבּער");
when(shape.getLeft()).thenReturn(100);
when(shape.getRight()).thenReturn(200);
when(shape.getTop()).thenReturn(100);
when(shape.getBottom()).thenReturn(200);
when(shape.getGroup()).thenReturn(group);
when(shape.getJochreImage()).thenReturn(jochreImage);
when(group.getRow()).thenReturn(row);
when(row.getParagraph()).thenReturn(paragraph);
when(paragraph.getImage()).thenReturn(jochreImage);
when(jochreImage.getPage()).thenReturn(jochrePage);
when(jochrePage.getDocument()).thenReturn(jochreDocument);
when(jochreDocument.getLocale()).thenReturn(jochreSession.getLocale());
when(shape.getSplits()).thenReturn(splits);
when(splits.iterator()).thenReturn(i);
when(i.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
when(i.next()).thenReturn(split1).thenReturn(split2).thenReturn(split3);
when(split1.getPosition()).thenReturn(35);
when(split2.getPosition()).thenReturn(59);
when(split3.getPosition()).thenReturn(82);
when(jochreImage.getShape(100, 100, 135, 200)).thenReturn(shape1);
when(jochreImage.getShape(136, 100, 159, 200)).thenReturn(shape2);
when(jochreImage.getShape(160, 100, 182, 200)).thenReturn(shape3);
when(jochreImage.getShape(183, 100, 200, 200)).thenReturn(shape4);
LOG.debug(shape.toString());
LOG.debug(shape.getLetter());
TrainingCorpusShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
List<ShapeSequence> result = splitter.split(shape);
ShapeSequence shapeSequence = result.get(0);
assertEquals(4, shapeSequence.size());
LOG.debug("Split into: " + shapeSequence.toString());
verify(shape1).setLetter("אָ");
verify(shape2).setLetter("בּ");
verify(shape3).setLetter("ע");
verify(shape4).setLetter("ר");
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class WidthToXHeightRatioFeature method checkInternal.
@Override
public FeatureResult<Double> checkInternal(ShapeWrapper shapeWrapper, RuntimeEnvironment env) {
Shape shape = shapeWrapper.getShape();
FeatureResult<Double> result = null;
double width = shape.getWidth();
double xHeight = shape.getXHeight();
if (xHeight == 0)
xHeight = 1;
double ratio = width / xHeight;
ratio = ratio * 0.5;
if (ratio > 1)
ratio = 1.0;
result = this.generateResult(ratio);
return result;
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class JochreLetterEventStream method getNextGroup.
void getNextGroup() {
shapeSequence = null;
shapeIndex = 0;
if (groupReader.hasNext()) {
GroupOfShapes group = groupReader.next();
if (boundaryDetector != null) {
// in this case the boundary detector is supposed to give us the
// correct
// splits and merges
shapeSequence = boundaryDetector.findBoundaries(group).get(0);
} else {
// simply add this group's shapes
shapeSequence = new ShapeSequence();
for (Shape shape : group.getShapes()) shapeSequence.addShape(shape);
}
history = new LetterSequence(shapeSequence, jochreSession);
}
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class LetterSequence method getRealSequence.
/**
* A string representation of the real sequence behind this letter sequence
* (including split letters and inkspots).
*/
public String getRealSequence() {
if (realSequence == null) {
Linguistics linguistics = jochreSession.getLinguistics();
StringBuilder realWordBuilder = new StringBuilder();
Shape lastShape = null;
for (ShapeInSequence shapeInSequence : this.getUnderlyingShapeSequence()) {
for (Shape originalShape : shapeInSequence.getOriginalShapes()) {
if (!originalShape.equals(lastShape)) {
String letter = originalShape.getLetter();
if (letter.length() == 0)
realWordBuilder.append("[]");
else if (letter.length() > 1 && !linguistics.getDualCharacterLetters().contains(letter))
realWordBuilder.append("[" + letter + "]");
else
realWordBuilder.append(letter);
}
lastShape = originalShape;
}
}
realSequence = realWordBuilder.toString();
}
return realSequence;
}
use of com.joliciel.jochre.graphics.Shape in project jochre by urieli.
the class LetterSequence method splitByGroup.
/**
* For a letter sequence covering two groups, split this letter sequence
* into one sequence per group.
*/
public List<LetterSequence> splitByGroup() {
List<LetterSequence> letterSequences = new ArrayList<LetterSequence>();
if (this.isSplit()) {
Map<GroupOfShapes, LetterSequence> groupToLetterSequenceMap = new HashMap<GroupOfShapes, LetterSequence>();
if (groupSequences != null) {
letterSequences = groupSequences;
for (LetterSequence letterSequence : letterSequences) {
groupToLetterSequenceMap.put(letterSequence.getGroups().get(0), letterSequence);
}
} else {
List<String> currentLetters = new ArrayList<String>();
ShapeSequence currentShapes = new ShapeSequence();
GroupOfShapes currentGroup = this.getGroups().get(0);
for (int i = 0; i < this.letters.size(); i++) {
String letter = this.letters.get(i);
Shape shape = this.underlyingShapeSequence.get(i).getShape();
if (!currentGroup.equals(shape.getGroup())) {
LetterSequence letterSequence = new LetterSequence(currentShapes, currentLetters, jochreSession);
letterSequence.setScore(this.getScore());
letterSequence.setAdjustedScore(this.getAdjustedScore());
groupToLetterSequenceMap.put(currentGroup, letterSequence);
letterSequences.add(letterSequence);
currentLetters = new ArrayList<String>();
currentShapes = new ShapeSequence();
currentGroup = shape.getGroup();
}
currentShapes.addShape(shape);
currentLetters.add(letter);
}
if (currentLetters.size() > 0) {
LetterSequence letterSequence = new LetterSequence(currentShapes, currentLetters, jochreSession);
letterSequence.setScore(this.getScore());
letterSequence.setAdjustedScore(this.getAdjustedScore());
groupToLetterSequenceMap.put(currentGroup, letterSequence);
letterSequences.add(letterSequence);
}
}
GroupOfShapes currentGroup = this.getGroups().get(0);
List<LetterSequence> newSubsequences = new ArrayList<LetterSequence>();
for (LetterSequence subsequence : this.getSubsequences()) {
if (subsequence.getHyphenSubsequence() != null) {
// subsequence contains end-of-line hyphen
// break it up into several subsequences
List<LetterSequence> subsequencesByGroup = subsequence.getSubsequences();
LetterSequence firstSubsequence = subsequencesByGroup.get(0);
firstSubsequence.setHyphenSubsequence(subsequence.getHyphenSubsequence());
newSubsequences.addAll(subsequencesByGroup);
for (LetterSequence subsubsequence : subsequencesByGroup) {
subsubsequence.setHyphenatedString(subsequence.getHyphenatedString());
}
} else {
newSubsequences.add(subsequence);
}
}
// assign my subsequences to the correct group
List<LetterSequence> currentSubsequences = new ArrayList<LetterSequence>();
for (LetterSequence subsequence : newSubsequences) {
if (!subsequence.getGroups().get(0).equals(currentGroup)) {
LetterSequence currentSequence = groupToLetterSequenceMap.get(currentGroup);
currentSequence.setSubsequences(currentSubsequences);
for (LetterSequence oneSubsequence : currentSubsequences) {
if (oneSubsequence.getWordFrequencies().size() > 0) {
currentSequence.getWordFrequencies().add(oneSubsequence.getWordFrequencies().get(0));
}
}
currentSubsequences = new ArrayList<LetterSequence>();
currentGroup = subsequence.getGroups().get(0);
}
currentSubsequences.add(subsequence);
}
if (currentSubsequences.size() > 0) {
LetterSequence currentSequence = groupToLetterSequenceMap.get(currentGroup);
currentSequence.setSubsequences(currentSubsequences);
for (LetterSequence oneSubsequence : currentSubsequences) {
if (oneSubsequence.getWordFrequencies().size() > 0) {
currentSequence.getWordFrequencies().add(oneSubsequence.getWordFrequencies().get(0));
}
}
}
if (this.getHyphenSubsequence() != null)
letterSequences.get(0).setHyphenSubsequence(this.getHyphenSubsequence());
for (LetterSequence letterSequence : letterSequences) {
letterSequence.setScore(this.getScore());
letterSequence.setAdjustedScore(this.getAdjustedScore());
}
} else {
letterSequences.add(this);
}
return letterSequences;
}
Aggregations