use of com.joliciel.jochre.graphics.GroupOfShapes in project jochre by urieli.
the class TrainingCorpusShapeSplitterTest method testSplit.
@Test
public void testSplit() throws Exception {
System.setProperty("config.file", "src/test/resources/testDualCharacters.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
final JochreSession jochreSession = new JochreSession(config);
final Shape shape = mock(Shape.class);
final Shape shape1 = mock(Shape.class);
final Shape shape2 = mock(Shape.class);
final Shape shape3 = mock(Shape.class);
final Shape shape4 = mock(Shape.class);
final GroupOfShapes group = mock(GroupOfShapes.class);
final RowOfShapes row = mock(RowOfShapes.class);
final Paragraph paragraph = mock(Paragraph.class);
final JochreImage jochreImage = mock(JochreImage.class);
final JochrePage jochrePage = mock(JochrePage.class);
final JochreDocument jochreDocument = mock(JochreDocument.class);
final Iterator<Split> i = (Iterator<Split>) mock(Iterator.class);
final List<Split> splits = (List<Split>) mock(List.class);
final Split split1 = mock(Split.class);
final Split split2 = mock(Split.class);
final Split split3 = mock(Split.class);
when(shape.getLetter()).thenReturn("אָבּער");
when(shape.getLeft()).thenReturn(100);
when(shape.getRight()).thenReturn(200);
when(shape.getTop()).thenReturn(100);
when(shape.getBottom()).thenReturn(200);
when(shape.getGroup()).thenReturn(group);
when(shape.getJochreImage()).thenReturn(jochreImage);
when(group.getRow()).thenReturn(row);
when(row.getParagraph()).thenReturn(paragraph);
when(paragraph.getImage()).thenReturn(jochreImage);
when(jochreImage.getPage()).thenReturn(jochrePage);
when(jochrePage.getDocument()).thenReturn(jochreDocument);
when(jochreDocument.getLocale()).thenReturn(jochreSession.getLocale());
when(shape.getSplits()).thenReturn(splits);
when(splits.iterator()).thenReturn(i);
when(i.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
when(i.next()).thenReturn(split1).thenReturn(split2).thenReturn(split3);
when(split1.getPosition()).thenReturn(35);
when(split2.getPosition()).thenReturn(59);
when(split3.getPosition()).thenReturn(82);
when(jochreImage.getShape(100, 100, 135, 200)).thenReturn(shape1);
when(jochreImage.getShape(136, 100, 159, 200)).thenReturn(shape2);
when(jochreImage.getShape(160, 100, 182, 200)).thenReturn(shape3);
when(jochreImage.getShape(183, 100, 200, 200)).thenReturn(shape4);
LOG.debug(shape.toString());
LOG.debug(shape.getLetter());
TrainingCorpusShapeSplitter splitter = new TrainingCorpusShapeSplitter(jochreSession);
List<ShapeSequence> result = splitter.split(shape);
ShapeSequence shapeSequence = result.get(0);
assertEquals(4, shapeSequence.size());
LOG.debug("Split into: " + shapeSequence.toString());
verify(shape1).setLetter("אָ");
verify(shape2).setLetter("בּ");
verify(shape3).setLetter("ע");
verify(shape4).setLetter("ר");
}
use of com.joliciel.jochre.graphics.GroupOfShapes in project jochre by urieli.
the class JochreLetterEventStream method getNextGroup.
void getNextGroup() {
shapeSequence = null;
shapeIndex = 0;
if (groupReader.hasNext()) {
GroupOfShapes group = groupReader.next();
if (boundaryDetector != null) {
// in this case the boundary detector is supposed to give us the
// correct
// splits and merges
shapeSequence = boundaryDetector.findBoundaries(group).get(0);
} else {
// simply add this group's shapes
shapeSequence = new ShapeSequence();
for (Shape shape : group.getShapes()) shapeSequence.addShape(shape);
}
history = new LetterSequence(shapeSequence, jochreSession);
}
}
use of com.joliciel.jochre.graphics.GroupOfShapes in project jochre by urieli.
the class LetterSequence method splitByGroup.
/**
* For a letter sequence covering two groups, split this letter sequence
* into one sequence per group.
*/
public List<LetterSequence> splitByGroup() {
List<LetterSequence> letterSequences = new ArrayList<LetterSequence>();
if (this.isSplit()) {
Map<GroupOfShapes, LetterSequence> groupToLetterSequenceMap = new HashMap<GroupOfShapes, LetterSequence>();
if (groupSequences != null) {
letterSequences = groupSequences;
for (LetterSequence letterSequence : letterSequences) {
groupToLetterSequenceMap.put(letterSequence.getGroups().get(0), letterSequence);
}
} else {
List<String> currentLetters = new ArrayList<String>();
ShapeSequence currentShapes = new ShapeSequence();
GroupOfShapes currentGroup = this.getGroups().get(0);
for (int i = 0; i < this.letters.size(); i++) {
String letter = this.letters.get(i);
Shape shape = this.underlyingShapeSequence.get(i).getShape();
if (!currentGroup.equals(shape.getGroup())) {
LetterSequence letterSequence = new LetterSequence(currentShapes, currentLetters, jochreSession);
letterSequence.setScore(this.getScore());
letterSequence.setAdjustedScore(this.getAdjustedScore());
groupToLetterSequenceMap.put(currentGroup, letterSequence);
letterSequences.add(letterSequence);
currentLetters = new ArrayList<String>();
currentShapes = new ShapeSequence();
currentGroup = shape.getGroup();
}
currentShapes.addShape(shape);
currentLetters.add(letter);
}
if (currentLetters.size() > 0) {
LetterSequence letterSequence = new LetterSequence(currentShapes, currentLetters, jochreSession);
letterSequence.setScore(this.getScore());
letterSequence.setAdjustedScore(this.getAdjustedScore());
groupToLetterSequenceMap.put(currentGroup, letterSequence);
letterSequences.add(letterSequence);
}
}
GroupOfShapes currentGroup = this.getGroups().get(0);
List<LetterSequence> newSubsequences = new ArrayList<LetterSequence>();
for (LetterSequence subsequence : this.getSubsequences()) {
if (subsequence.getHyphenSubsequence() != null) {
// subsequence contains end-of-line hyphen
// break it up into several subsequences
List<LetterSequence> subsequencesByGroup = subsequence.getSubsequences();
LetterSequence firstSubsequence = subsequencesByGroup.get(0);
firstSubsequence.setHyphenSubsequence(subsequence.getHyphenSubsequence());
newSubsequences.addAll(subsequencesByGroup);
for (LetterSequence subsubsequence : subsequencesByGroup) {
subsubsequence.setHyphenatedString(subsequence.getHyphenatedString());
}
} else {
newSubsequences.add(subsequence);
}
}
// assign my subsequences to the correct group
List<LetterSequence> currentSubsequences = new ArrayList<LetterSequence>();
for (LetterSequence subsequence : newSubsequences) {
if (!subsequence.getGroups().get(0).equals(currentGroup)) {
LetterSequence currentSequence = groupToLetterSequenceMap.get(currentGroup);
currentSequence.setSubsequences(currentSubsequences);
for (LetterSequence oneSubsequence : currentSubsequences) {
if (oneSubsequence.getWordFrequencies().size() > 0) {
currentSequence.getWordFrequencies().add(oneSubsequence.getWordFrequencies().get(0));
}
}
currentSubsequences = new ArrayList<LetterSequence>();
currentGroup = subsequence.getGroups().get(0);
}
currentSubsequences.add(subsequence);
}
if (currentSubsequences.size() > 0) {
LetterSequence currentSequence = groupToLetterSequenceMap.get(currentGroup);
currentSequence.setSubsequences(currentSubsequences);
for (LetterSequence oneSubsequence : currentSubsequences) {
if (oneSubsequence.getWordFrequencies().size() > 0) {
currentSequence.getWordFrequencies().add(oneSubsequence.getWordFrequencies().get(0));
}
}
}
if (this.getHyphenSubsequence() != null)
letterSequences.get(0).setHyphenSubsequence(this.getHyphenSubsequence());
for (LetterSequence letterSequence : letterSequences) {
letterSequence.setScore(this.getScore());
letterSequence.setAdjustedScore(this.getAdjustedScore());
}
} else {
letterSequences.add(this);
}
return letterSequences;
}
use of com.joliciel.jochre.graphics.GroupOfShapes in project jochre by urieli.
the class FixTextWindowController method onClick$btnOK.
@Listen("onClick = #btnOK")
public void onClick$btnOK(Event event) {
// TODO: replacing all occurrences of the same word on the line instead
// of just the current one
LOG.debug("onClick$btnOK");
winFixText.setVisible(false);
String rowText = (String) winFixText.getAttribute(FixTextWindowController.ATTR_ROW_TEXT);
Textbox rowTextBox = (Textbox) winFixText.getAttribute(FixTextWindowController.ATTR_ROW_TEXTBOX);
LetterLabelUpdater updater = (LetterLabelUpdater) winFixText.getAttribute(FixTextWindowController.ATTR_LETTER_UPDATER);
GroupOfShapes group = (GroupOfShapes) winFixText.getAttribute(FixTextWindowController.ATTR_GROUP);
group.setSkip(chkSkip.isChecked());
group.setHardHyphen(chkHardHyphen.isChecked());
group.setBrokenWord(chkBrokenWord.isChecked());
group.setSegmentationProblem(chkSegmentProblem.isChecked());
group.save();
List<Textbox> letterBoxes = new ArrayList<Textbox>();
for (Object child : letterBoxRow.getChildren()) {
if (child instanceof Textbox) {
letterBoxes.add((Textbox) child);
}
}
StringBuilder sb = new StringBuilder();
// for (Textbox letterBox : letterBoxes) {
for (Shape shape : group.getShapes()) {
Textbox letterBox = (Textbox) letterBoxRow.getFellow("FixTextLetterBox_" + shape.getId());
String letter = letterBox.getText();
String newLetter = ImageController.getLetterForDisplay(letter);
LOG.debug("Letter: " + letter + ", newLetter: " + newLetter);
sb.append(newLetter);
}
LOG.debug(sb.toString());
String newText = rowText.replace(FixTextWindowController.ROW_TEXT_PLACE_HOLDER, sb.toString());
rowTextBox.setText(newText);
updater.updateLetterLabels();
}
use of com.joliciel.jochre.graphics.GroupOfShapes in project jochre by urieli.
the class LastShapeInRowFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(ShapeInSequenceWrapper wrapper, RuntimeEnvironment env) {
ShapeInSequence shapeInSequence = wrapper.getShapeInSequence();
boolean lastShapeInSequence = false;
if (shapeInSequence.getShapeSequence().size() == (shapeInSequence.getIndex() + 1))
lastShapeInSequence = true;
boolean lastShapeInRow = false;
if (lastShapeInSequence) {
GroupOfShapes group = shapeInSequence.getShape().getGroup();
if (group.getIndex() == group.getRow().getGroups().size() - 1)
lastShapeInRow = true;
}
FeatureResult<Boolean> outcome = this.generateResult(lastShapeInRow);
return outcome;
}
Aggregations