Search in sources :

Example 1 with ShapeRightToLeftComparator

use of com.joliciel.jochre.graphics.ShapeRightToLeftComparator in project jochre by urieli.

the class TrainingCorpusShapeSplitter method split.

@Override
public List<ShapeSequence> split(Shape shape) {
    List<ShapeSequence> shapeSequences = new ArrayList<ShapeSequence>();
    ShapeSequence shapeSequence = new ShapeSequence();
    shapeSequences.add(shapeSequence);
    Set<String> nonSplittableLetters = jochreSession.getLinguistics().getDualCharacterLetters();
    String testLetter = shape.getLetter().replace("|", "");
    if (testLetter.length() == 1 || nonSplittableLetters.contains(testLetter)) {
        shapeSequence.addShape(shape);
    } else {
        int lastLeft = 0;
        Comparator<Shape> shapeComparator = null;
        if (jochreSession.getLinguistics().isLeftToRight())
            shapeComparator = new ShapeLeftToRightComparator();
        else
            shapeComparator = new ShapeRightToLeftComparator();
        TreeSet<Shape> splitShapes = new TreeSet<Shape>(shapeComparator);
        for (Split split : shape.getSplits()) {
            Shape newShape = shape.getJochreImage().getShape(shape.getLeft() + lastLeft, shape.getTop(), shape.getLeft() + split.getPosition(), shape.getBottom());
            lastLeft = split.getPosition() + 1;
            splitShapes.add(newShape);
        }
        Shape lastShape = shape.getJochreImage().getShape(shape.getLeft() + lastLeft, shape.getTop(), shape.getRight(), shape.getBottom());
        splitShapes.add(lastShape);
        List<String> splitLetters = new ArrayList<String>();
        char lastChar = 0;
        boolean haveSplitLetter = false;
        for (int i = 0; i < shape.getLetter().length(); i++) {
            char c = shape.getLetter().charAt(i);
            if (c == '|')
                haveSplitLetter = true;
            if (lastChar != 0) {
                String doubleChar = "" + lastChar + c;
                if (nonSplittableLetters.contains(doubleChar)) {
                    splitLetters.add(doubleChar);
                    lastChar = 0;
                } else {
                    splitLetters.add("" + lastChar);
                    lastChar = c;
                }
            } else {
                lastChar = c;
            }
        }
        if (lastChar != 0)
            splitLetters.add("" + lastChar);
        if (splitLetters.size() == 0)
            splitLetters.add("");
        // mean end a split a or start a split b
        if (haveSplitLetter) {
            int i = 0;
            List<String> newSplitLetters = new ArrayList<String>();
            boolean inSplit = false;
            for (String letter : splitLetters) {
                if (letter.equals("|")) {
                    if (i == 1 && i == splitLetters.size() - 2) {
                        // smack in the middle - ambiguous split mark
                        Shape previousShape = null;
                        Shape nextShape = null;
                        String previousLetter = splitLetters.get(0);
                        String nextLetter = splitLetters.get(2);
                        if (shape.getIndex() > 0) {
                            previousShape = shape.getGroup().getShapes().get(shape.getIndex() - 1);
                        }
                        if (shape.getIndex() < shape.getGroup().getShapes().size() - 1) {
                            nextShape = shape.getGroup().getShapes().get(shape.getIndex() + 1);
                        }
                        boolean backwardsSplit = true;
                        if (previousShape != null && previousShape.getLetter().equals("|" + previousLetter)) {
                            backwardsSplit = true;
                        } else if (nextShape != null && nextShape.getLetter().equals(nextLetter + "|")) {
                            backwardsSplit = false;
                        } else if (previousShape != null && previousShape.getLetter().length() == 0) {
                            backwardsSplit = true;
                        } else if (nextShape != null && nextShape.getLetter().length() == 0) {
                            backwardsSplit = false;
                        } else {
                            throw new JochreException("Impossible split for shape " + shape.getId() + ": " + previousLetter + "|" + nextLetter);
                        }
                        if (backwardsSplit) {
                            // start split
                            String letterWithSplit = newSplitLetters.get(0) + "|";
                            newSplitLetters.remove(0);
                            newSplitLetters.add(letterWithSplit);
                        } else {
                            inSplit = true;
                        }
                    } else if (i == 1) {
                        // start split
                        String letterWithSplit = newSplitLetters.get(0) + "|";
                        newSplitLetters.remove(0);
                        newSplitLetters.add(letterWithSplit);
                    } else if (i == splitLetters.size() - 2) {
                        // end split
                        inSplit = true;
                    } else {
                        throw new JochreException("Impossible split location for shape " + shape.getId() + ": " + shape.getLetter());
                    }
                } else if (inSplit) {
                    newSplitLetters.add("|" + letter);
                    inSplit = false;
                } else {
                    newSplitLetters.add(letter);
                }
                i++;
            }
            splitLetters = newSplitLetters;
        }
        if (splitLetters.size() != splitShapes.size()) {
            throw new JochreException("Cannot have more shapes than letters in shape " + shape.getId() + ": " + shape.getLetter() + ", " + splitLetters);
        }
        int i = 0;
        for (Shape splitShape : splitShapes) {
            shapeSequence.addShape(splitShape, shape);
            splitShape.setLetter(splitLetters.get(i++));
        }
    }
    return shapeSequences;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) ShapeRightToLeftComparator(com.joliciel.jochre.graphics.ShapeRightToLeftComparator) ArrayList(java.util.ArrayList) JochreException(com.joliciel.jochre.utils.JochreException) ShapeLeftToRightComparator(com.joliciel.jochre.graphics.ShapeLeftToRightComparator) TreeSet(java.util.TreeSet)

Aggregations

Shape (com.joliciel.jochre.graphics.Shape)1 ShapeLeftToRightComparator (com.joliciel.jochre.graphics.ShapeLeftToRightComparator)1 ShapeRightToLeftComparator (com.joliciel.jochre.graphics.ShapeRightToLeftComparator)1 JochreException (com.joliciel.jochre.utils.JochreException)1 ArrayList (java.util.ArrayList)1 TreeSet (java.util.TreeSet)1