use of com.joliciel.jochre.graphics.ShapeRightToLeftComparator in project jochre by urieli.
the class TrainingCorpusShapeSplitter method split.
@Override
public List<ShapeSequence> split(Shape shape) {
List<ShapeSequence> shapeSequences = new ArrayList<ShapeSequence>();
ShapeSequence shapeSequence = new ShapeSequence();
shapeSequences.add(shapeSequence);
Set<String> nonSplittableLetters = jochreSession.getLinguistics().getDualCharacterLetters();
String testLetter = shape.getLetter().replace("|", "");
if (testLetter.length() == 1 || nonSplittableLetters.contains(testLetter)) {
shapeSequence.addShape(shape);
} else {
int lastLeft = 0;
Comparator<Shape> shapeComparator = null;
if (jochreSession.getLinguistics().isLeftToRight())
shapeComparator = new ShapeLeftToRightComparator();
else
shapeComparator = new ShapeRightToLeftComparator();
TreeSet<Shape> splitShapes = new TreeSet<Shape>(shapeComparator);
for (Split split : shape.getSplits()) {
Shape newShape = shape.getJochreImage().getShape(shape.getLeft() + lastLeft, shape.getTop(), shape.getLeft() + split.getPosition(), shape.getBottom());
lastLeft = split.getPosition() + 1;
splitShapes.add(newShape);
}
Shape lastShape = shape.getJochreImage().getShape(shape.getLeft() + lastLeft, shape.getTop(), shape.getRight(), shape.getBottom());
splitShapes.add(lastShape);
List<String> splitLetters = new ArrayList<String>();
char lastChar = 0;
boolean haveSplitLetter = false;
for (int i = 0; i < shape.getLetter().length(); i++) {
char c = shape.getLetter().charAt(i);
if (c == '|')
haveSplitLetter = true;
if (lastChar != 0) {
String doubleChar = "" + lastChar + c;
if (nonSplittableLetters.contains(doubleChar)) {
splitLetters.add(doubleChar);
lastChar = 0;
} else {
splitLetters.add("" + lastChar);
lastChar = c;
}
} else {
lastChar = c;
}
}
if (lastChar != 0)
splitLetters.add("" + lastChar);
if (splitLetters.size() == 0)
splitLetters.add("");
// mean end a split a or start a split b
if (haveSplitLetter) {
int i = 0;
List<String> newSplitLetters = new ArrayList<String>();
boolean inSplit = false;
for (String letter : splitLetters) {
if (letter.equals("|")) {
if (i == 1 && i == splitLetters.size() - 2) {
// smack in the middle - ambiguous split mark
Shape previousShape = null;
Shape nextShape = null;
String previousLetter = splitLetters.get(0);
String nextLetter = splitLetters.get(2);
if (shape.getIndex() > 0) {
previousShape = shape.getGroup().getShapes().get(shape.getIndex() - 1);
}
if (shape.getIndex() < shape.getGroup().getShapes().size() - 1) {
nextShape = shape.getGroup().getShapes().get(shape.getIndex() + 1);
}
boolean backwardsSplit = true;
if (previousShape != null && previousShape.getLetter().equals("|" + previousLetter)) {
backwardsSplit = true;
} else if (nextShape != null && nextShape.getLetter().equals(nextLetter + "|")) {
backwardsSplit = false;
} else if (previousShape != null && previousShape.getLetter().length() == 0) {
backwardsSplit = true;
} else if (nextShape != null && nextShape.getLetter().length() == 0) {
backwardsSplit = false;
} else {
throw new JochreException("Impossible split for shape " + shape.getId() + ": " + previousLetter + "|" + nextLetter);
}
if (backwardsSplit) {
// start split
String letterWithSplit = newSplitLetters.get(0) + "|";
newSplitLetters.remove(0);
newSplitLetters.add(letterWithSplit);
} else {
inSplit = true;
}
} else if (i == 1) {
// start split
String letterWithSplit = newSplitLetters.get(0) + "|";
newSplitLetters.remove(0);
newSplitLetters.add(letterWithSplit);
} else if (i == splitLetters.size() - 2) {
// end split
inSplit = true;
} else {
throw new JochreException("Impossible split location for shape " + shape.getId() + ": " + shape.getLetter());
}
} else if (inSplit) {
newSplitLetters.add("|" + letter);
inSplit = false;
} else {
newSplitLetters.add(letter);
}
i++;
}
splitLetters = newSplitLetters;
}
if (splitLetters.size() != splitShapes.size()) {
throw new JochreException("Cannot have more shapes than letters in shape " + shape.getId() + ": " + shape.getLetter() + ", " + splitLetters);
}
int i = 0;
for (Shape splitShape : splitShapes) {
shapeSequence.addShape(splitShape, shape);
splitShape.setLetter(splitLetters.get(i++));
}
}
return shapeSequences;
}
Aggregations