Search in sources :

Example 1 with Text

use of biblemulticonverter.format.paratext.ParatextCharacterContent.Text in project BibleMultiConverter by schierlm.

the class ParatextBook method fixTrailingWhitespace.

/**
 * Before converting to non-Paratext format, remove (unformatted) whitespace
 * at the end of a verse. Also move whitespace at the end of character
 * content outside of it. This can be introduced by importing USX2 bibles.
 * The whitespace should remain when converting to other Paratext formats,
 * therefore only strip it when converting to non-Paratext formats.
 */
public void fixTrailingWhitespace() {
    boolean seenVerseEnd = false;
    for (int i = content.size() - 1; i >= 0; i--) {
        if (content.get(i) instanceof ParatextCharacterContent) {
            ParatextCharacterContent cc = (ParatextCharacterContent) content.get(i);
            fixTrailingWhitespace(cc);
            for (int j = cc.getContent().size() - 1; j >= 0; j--) {
                if (seenVerseEnd && cc.getContent().get(j) instanceof Text) {
                    Text oldText = (Text) cc.getContent().get(j);
                    if (oldText.getChars().matches(" +")) {
                        cc.getContent().remove(j);
                    } else {
                        cc.getContent().set(j, Text.from(oldText.getChars().replaceFirst(" +$", "")));
                    }
                }
                seenVerseEnd = j < cc.getContent().size() && (cc.getContent().get(j) instanceof VerseEnd);
            }
        } else {
            seenVerseEnd = content.get(i) instanceof TableCellStart;
        }
    }
}
Also used : VerseEnd(biblemulticonverter.format.paratext.ParatextCharacterContent.VerseEnd) Text(biblemulticonverter.format.paratext.ParatextCharacterContent.Text)

Example 2 with Text

use of biblemulticonverter.format.paratext.ParatextCharacterContent.Text in project BibleMultiConverter by schierlm.

the class ParatextDump method importCharContent.

private void importCharContent(List<ParatextCharacterContentPart> target, BufferedReader br, String line) throws IOException {
    String[] parts = line.split("\t", 3);
    switch(parts[0]) {
        case "VERSE":
            target.add(new VerseStart(VerseIdentifier.fromStringOrThrow(parts[1]), parts[2]));
            break;
        case "VERSE-END":
            target.add(new ParatextCharacterContent.VerseEnd(VerseIdentifier.fromStringOrThrow(parts[2])));
            break;
        case "FOOTNOTE":
            FootnoteXref fx = new FootnoteXref(Objects.requireNonNull(FootnoteXrefKind.allTags().get(parts[1])), parts[2]);
            target.add(fx);
            while (!(line = br.readLine()).equals("FOOTNOTEEND")) {
                importCharContent(fx.getContent(), br, line);
            }
            break;
        case "CHARFORMAT":
            AutoClosingFormatting acf = new AutoClosingFormatting(Objects.requireNonNull(AutoClosingFormattingKind.allTags().get(parts[1])), false);
            target.add(acf);
            while (!(line = br.readLine()).equals("CHARFORMATEND")) {
                if (line.startsWith("ATTRIBUTE\t")) {
                    parts = line.split("\t", 3);
                    acf.getAttributes().put(parts[1], parts[2]);
                } else {
                    importCharContent(acf.getContent(), br, line);
                }
            }
            break;
        case "REFERENCE":
            target.add(Reference.parse(parts[1], parts[2]));
            break;
        case "TEXT":
            Text text = Text.from(parts[2]);
            if (text != null) {
                target.add(text);
            }
            break;
        default:
            throw new IOException(line);
    }
}
Also used : VerseStart(biblemulticonverter.format.paratext.ParatextCharacterContent.VerseStart) AutoClosingFormatting(biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormatting) Text(biblemulticonverter.format.paratext.ParatextCharacterContent.Text) IOException(java.io.IOException) FootnoteXref(biblemulticonverter.format.paratext.ParatextCharacterContent.FootnoteXref)

Example 3 with Text

use of biblemulticonverter.format.paratext.ParatextCharacterContent.Text in project BibleMultiConverter by schierlm.

the class USFX method parseElement.

private void parseElement(ParatextBook result, List<ParatextCharacterContentContainer> containerStack, JAXBElement<?> element, ImportBookContext context) {
    String localName = element.getName().getLocalPart();
    if (localName.equals("rem") || localName.equals("cl")) {
        result.getAttributes().put(localName, TextUtilities.whitespaceNormalization((String) element.getValue()).trim());
    } else if (localName.equals("h")) {
        Usfx.Book.H h = (Usfx.Book.H) element.getValue();
        result.getAttributes().put("h" + (h.getLevel() == null ? "" : h.getLevel()), TextUtilities.whitespaceNormalization(h.getValue()).trim());
    } else if (localName.equals("b") && element.getValue() instanceof Usfx.Book.B) {
        Usfx.Book.B b = (Usfx.Book.B) element.getValue();
        String tag = (b.getSfm() == null ? localName : b.getSfm());
        ParagraphKind kind = USFM.PARAGRAPH_TAGS.get(tag);
        if (kind == null) {
            System.out.println("WARNING: Unsupported paragraph kind: " + kind);
            kind = ParagraphKind.PARAGRAPH_P;
        }
        result.getContent().add(new ParagraphStart(kind));
        containerStack.clear();
    } else if (Arrays.asList("p", "q", "d", "s", "mt", "b").contains(localName)) {
        PType pt = (PType) element.getValue();
        String tag = (pt.getSfm() == null ? localName : pt.getSfm()) + (pt.getLevel() == null ? "" : "" + pt.getLevel());
        ParagraphKind kind = USFM.PARAGRAPH_TAGS.get(tag);
        if (kind == null) {
            System.out.println("WARNING: Unsupported paragraph kind: " + kind);
            kind = ParagraphKind.PARAGRAPH_P;
        }
        result.getContent().add(new ParagraphStart(kind));
        containerStack.clear();
        parseElements(result, containerStack, pt.getContent(), context);
    } else if (Arrays.asList("sectionBoundary", "ca", "milestone", "va", "fm", "fig", "gw", "cs", "wr").contains(localName)) {
        System.out.println("WARNING: Skipping unsupported tag: " + localName);
    } else if (Arrays.asList("generated", "cp", "vp", "wtp", "da", "fs").contains(localName)) {
    // to be skipped
    } else if (localName.equals("c")) {
        ImportUtilities.closeOpenChapter(result, context.openChapter);
        String id;
        if (element.getValue() instanceof Usfx.Book.C) {
            Usfx.Book.C c = (Usfx.Book.C) element.getValue();
            id = c.getId();
        } else if (element.getValue() instanceof PType.C) {
            PType.C c = (PType.C) element.getValue();
            id = c.getId();
        } else {
            throw new IllegalStateException(element.getValue().getClass().getName());
        }
        context.openChapter = new ChapterStart(new ChapterIdentifier(result.getId(), Integer.parseInt(id)));
        result.getContent().add(context.openChapter);
        containerStack.clear();
    } else if (localName.equals("toc")) {
        StyledString ss = (StyledString) element.getValue();
        result.getAttributes().put("toc" + ss.getLevel(), TextUtilities.whitespaceNormalization(ss.getContent().stream().filter(c -> c instanceof String).map(Serializable::toString).collect(Collectors.joining())).trim());
    } else if (localName.equals("table") && element.getValue() instanceof Usfx.Book.Table) {
        Usfx.Book.Table table = (Usfx.Book.Table) element.getValue();
        for (Usfx.Book.Table.Tr tr : table.getTr()) {
            result.getContent().add(new ParagraphStart(ParagraphKind.TABLE_ROW));
            for (JAXBElement<PType> cell : tr.getThOrThrOrTc()) {
                result.getContent().add(new TableCellStart(cell.getName().getLocalPart() + cell.getValue().getLevel()));
                containerStack.clear();
                parseElements(result, containerStack, cell.getValue().getContent(), context);
            }
        }
    } else if (localName.equals("table") && element.getValue() instanceof PType.Table) {
        PType.Table table = (PType.Table) element.getValue();
        for (PType.Table.Tr tr : table.getTr()) {
            result.getContent().add(new ParagraphStart(ParagraphKind.TABLE_ROW));
            for (JAXBElement<PType> cell : tr.getThOrThrOrTc()) {
                result.getContent().add(new TableCellStart(cell.getName().getLocalPart() + cell.getValue().getLevel()));
                containerStack.clear();
                parseElements(result, containerStack, cell.getValue().getContent(), context);
            }
        }
    } else if (localName.equals("periph")) {
        result.getContent().add(new ParagraphStart(ParagraphKind.PERIPHERALS));
        containerStack.clear();
        ParatextCharacterContent container = new ParatextCharacterContent();
        Text text = Text.from((String) element.getValue());
        if (text != null) {
            container.getContent().add(text);
        }
        containerStack.add(container);
        result.getContent().add(container);
    } else if (localName.equals("v")) {
        String id;
        if (element.getValue() instanceof Usfx.Book.V) {
            Usfx.Book.V v = (Usfx.Book.V) element.getValue();
            id = v.getId();
        } else if (element.getValue() instanceof PType.V) {
            PType.V v = (PType.V) element.getValue();
            id = v.getId();
        } else {
            throw new IllegalStateException(element.getValue().getClass().getName());
        }
        if (containerStack.isEmpty()) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            containerStack.add(container);
            result.getContent().add(container);
        }
        ChapterStart chapter = result.findLastBookContent(ChapterStart.class);
        if (chapter == null) {
            throw new IllegalStateException("Verse found before chapter start: " + id);
        }
        VerseIdentifier location = new VerseIdentifier(result.getId(), chapter.getChapter(), id);
        containerStack.get(containerStack.size() - 1).getContent().add(new VerseStart(location, id));
    } else if (localName.equals("ve")) {
        VerseStart start = result.findLastCharacterContent(VerseStart.class);
        if (start == null) {
            throw new IllegalStateException("Verse end found before verse start!");
        }
        if (containerStack.isEmpty()) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            containerStack.add(container);
            result.getContent().add(container);
        }
        containerStack.get(containerStack.size() - 1).getContent().add(new ParatextCharacterContent.VerseEnd(start.getLocation()));
    } else if (Arrays.asList("f", "x", "fe").contains(localName)) {
        NoteContents nc = (NoteContents) element.getValue();
        String sfm = nc.getSfm();
        if (sfm == null || sfm.isEmpty())
            sfm = localName;
        String caller = nc.getCaller();
        if (caller == null || caller.isEmpty())
            caller = "+";
        FootnoteXref nextContainer = new FootnoteXref(USFM.FOOTNOTE_XREF_TAGS.get(sfm), caller);
        if (containerStack.isEmpty()) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            containerStack.add(container);
            result.getContent().add(container);
        }
        containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
        containerStack.add(nextContainer);
        parseElements(result, containerStack, nc.getContent(), context);
        containerStack.remove(nextContainer);
    } else if (Arrays.asList("fp", "fr", "fk", "fq", "fqa", "fl", "fdc", "fv", "ft", "fm", "xo", "xk", "xq", "xt", "xot", "xnt", "xdc").contains(localName) || (Arrays.asList("nd", "c", "tl", "it", "qt", "sls", "dc", "bdit", "bk", "pn", "k", "ord", "add", "bd", "sc", "wh", "wg", "wr", "wj", "cs", "em").contains(localName) && element.getValue() instanceof NoteContents)) {
        NoteContents nc = (NoteContents) element.getValue();
        String sfm = nc.getSfm();
        if (sfm == null || sfm.isEmpty())
            sfm = localName;
        if (containerStack.isEmpty()) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            containerStack.add(container);
            result.getContent().add(container);
        }
        AutoClosingFormatting nextContainer = new AutoClosingFormatting(USFM.AUTO_CLOSING_TAGS.get(sfm), false);
        containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
        containerStack.add(nextContainer);
        parseElements(result, containerStack, nc.getContent(), context);
        containerStack.remove(nextContainer);
    } else if (localName.equals("optionalLineBreak")) {
        System.out.println("WARNING: Skipping optional line break");
    } else if (localName.equals("ref")) {
        RefType rt = (RefType) element.getValue();
        ParatextCharacterContentPart ref = Text.from(rt.getContent());
        // This code does not allow for a second book, as in: ISA.7.14-ISA.7.15.
        if (rt.getTgt() == null || !rt.getTgt().matches("[A-Z1-4]{3}\\.[0-9]+\\.[0-9]+(-[0-9]+(\\.[0-9]+)?)?")) {
            System.out.println("WARNING: Unsupported structured reference format - replaced by plain text: " + rt.getTgt());
        } else {
            String[] parts = rt.getTgt().split("[ .-]");
            ParatextID id = ParatextID.fromIdentifier(parts[0]);
            if (id == null) {
                System.out.println("WARNING: Unsupported book in structured reference - replaced by plain text: " + parts[0]);
            } else {
                int c1 = Integer.parseInt(parts[1]);
                String v1 = parts[2];
                if (parts.length > 3) {
                    // second verse
                    String v2 = parts[parts.length - 1];
                    if (parts.length == 5) {
                        // second chapter
                        int c2 = Integer.parseInt(parts[3]);
                        ref = Reference.verseRange(id, c1, v1, c2, v2, rt.getContent());
                    } else {
                        // No second chapter, but we do have a second verse, use first chapter as second chapter.
                        ref = Reference.verseRange(id, c1, v1, c1, v2, rt.getContent());
                    }
                } else {
                    ref = Reference.verse(id, c1, v1, rt.getContent());
                }
            }
        }
        if (ref != null) {
            if (containerStack.isEmpty()) {
                ParatextCharacterContent container = new ParatextCharacterContent();
                containerStack.add(container);
                result.getContent().add(container);
            }
            containerStack.get(containerStack.size() - 1).getContent().add(ref);
        }
    } else if (localName.equals("w")) {
        PType.W w = (PType.W) element.getValue();
        String sfm = w.getSfm();
        if (sfm == null || sfm.isEmpty())
            sfm = localName;
        AutoClosingFormatting nextContainer = new AutoClosingFormatting(USFM.AUTO_CLOSING_TAGS.get(sfm), false);
        if (w.getL() != null && !w.getL().isEmpty())
            nextContainer.getAttributes().put("lemma", w.getL());
        if (w.getS() != null && !w.getS().isEmpty())
            nextContainer.getAttributes().put("strong", w.getS());
        if (w.getM() != null && !w.getM().isEmpty())
            nextContainer.getAttributes().put("x-morph", w.getM());
        if (w.getSrcloc() != null && !w.getSrcloc().isEmpty())
            nextContainer.getAttributes().put("x-srcloc", w.getSrcloc());
        if (w.isPlural() != null)
            nextContainer.getAttributes().put("x-plural", "" + w.isPlural());
        containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
        containerStack.add(nextContainer);
        parseElements(result, containerStack, w.getContent(), context);
        containerStack.remove(nextContainer);
    } else if (localName.equals("quoteStart")) {
        PType.QuoteStart qs = (PType.QuoteStart) element.getValue();
        Text text = Text.from(qs.getValue());
        if (text != null) {
            if (containerStack.isEmpty()) {
                ParatextCharacterContent container = new ParatextCharacterContent();
                containerStack.add(container);
                result.getContent().add(container);
            }
            containerStack.get(containerStack.size() - 1).getContent().add(text);
        }
    } else if (localName.equals("quoteRemind") || localName.equals("quoteEnd")) {
        Text text = Text.from((String) element.getValue());
        if (text != null) {
            if (containerStack.isEmpty()) {
                ParatextCharacterContent container = new ParatextCharacterContent();
                containerStack.add(container);
                result.getContent().add(container);
            }
            containerStack.get(containerStack.size() - 1).getContent().add(text);
        }
    } else if (Arrays.asList("rq", "em", "qt", "nd", "tl", "qs", "qac", "sls", "dc", "bk", "k", "add", "sig", "bd", "it", "bdit", "sc", "wj").contains(localName)) {
        PType v = (PType) element.getValue();
        String sfm = v.getSfm();
        if (sfm == null || sfm.isEmpty())
            sfm = localName;
        AutoClosingFormatting nextContainer = new AutoClosingFormatting(USFM.AUTO_CLOSING_TAGS.get(sfm), false);
        if (containerStack.isEmpty()) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            containerStack.add(container);
            result.getContent().add(container);
        }
        containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
        containerStack.add(nextContainer);
        parseElements(result, containerStack, v.getContent(), context);
        containerStack.remove(nextContainer);
    } else if (Arrays.asList("pn", "ord", "no", "ndx", "wh", "wg", "ior").contains(localName)) {
        AutoClosingFormatting nextContainer = new AutoClosingFormatting(USFM.AUTO_CLOSING_TAGS.get(localName), false);
        if (containerStack.isEmpty()) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            containerStack.add(container);
            result.getContent().add(container);
        }
        containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
        Text text = Text.from((String) element.getValue());
        if (text != null) {
            nextContainer.getContent().add(text);
        }
    } else {
        System.out.println("WARNING: Unexpected tag: " + localName);
    }
}
Also used : TableCellStart(biblemulticonverter.format.paratext.ParatextBook.TableCellStart) StyledString(biblemulticonverter.schema.usfx.StyledString) RefType(biblemulticonverter.schema.usfx.RefType) ParatextID(biblemulticonverter.format.paratext.ParatextBook.ParatextID) Book(biblemulticonverter.schema.usfx.Usfx.Book) NoteContents(biblemulticonverter.schema.usfx.NoteContents) AutoClosingFormatting(biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormatting) PType(biblemulticonverter.schema.usfx.PType) Text(biblemulticonverter.format.paratext.ParatextCharacterContent.Text) StyledString(biblemulticonverter.schema.usfx.StyledString) JAXBElement(javax.xml.bind.JAXBElement) ChapterStart(biblemulticonverter.format.paratext.ParatextBook.ChapterStart) FootnoteXref(biblemulticonverter.format.paratext.ParatextCharacterContent.FootnoteXref) VerseIdentifier(biblemulticonverter.format.paratext.model.VerseIdentifier) VerseStart(biblemulticonverter.format.paratext.ParatextCharacterContent.VerseStart) ParatextCharacterContentPart(biblemulticonverter.format.paratext.ParatextCharacterContent.ParatextCharacterContentPart) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) Usfx(biblemulticonverter.schema.usfx.Usfx) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) ParagraphStart(biblemulticonverter.format.paratext.ParatextBook.ParagraphStart)

Example 4 with Text

use of biblemulticonverter.format.paratext.ParatextCharacterContent.Text in project BibleMultiConverter by schierlm.

the class USFX method parseElements.

private void parseElements(ParatextBook result, List<ParatextCharacterContentContainer> containerStack, List<Serializable> elements, ImportBookContext context) {
    for (int i = 0; i < elements.size() - 1; i++) {
        Serializable s1 = elements.get(i);
        Serializable s2 = elements.get(i + 1);
        if (s1 instanceof String && s2 instanceof JAXBElement) {
            if (Arrays.asList("c", "v", "ve").contains(((JAXBElement<?>) s2).getName().getLocalPart())) {
                elements.set(i, s1.toString().replaceAll("[\r\n\t ]+$", ""));
            }
        } else if (s1 instanceof JAXBElement<?> && s2 instanceof String) {
            if (Arrays.asList("c", "v", "ve").contains(((JAXBElement<?>) s1).getName().getLocalPart())) {
                elements.set(i + 1, s2.toString().replaceAll("^[\r\n\t ]", ""));
            }
        }
    }
    for (Serializable s : elements) {
        if (s instanceof String) {
            Text text = Text.from((String) s);
            if (text == null) {
                continue;
            }
            if (containerStack.isEmpty()) {
                ParatextCharacterContent container = new ParatextCharacterContent();
                containerStack.add(container);
                result.getContent().add(container);
            }
            containerStack.get(containerStack.size() - 1).getContent().add(text);
        } else if (s instanceof JAXBElement<?>) {
            parseElement(result, containerStack, (JAXBElement<?>) s, context);
        } else {
            System.out.println("WARNING: Skipping unsupported content inside of book " + s);
        }
    }
}
Also used : Serializable(java.io.Serializable) Text(biblemulticonverter.format.paratext.ParatextCharacterContent.Text) StyledString(biblemulticonverter.schema.usfx.StyledString) JAXBElement(javax.xml.bind.JAXBElement)

Aggregations

Text (biblemulticonverter.format.paratext.ParatextCharacterContent.Text)4 AutoClosingFormatting (biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormatting)2 FootnoteXref (biblemulticonverter.format.paratext.ParatextCharacterContent.FootnoteXref)2 VerseStart (biblemulticonverter.format.paratext.ParatextCharacterContent.VerseStart)2 StyledString (biblemulticonverter.schema.usfx.StyledString)2 JAXBElement (javax.xml.bind.JAXBElement)2 ChapterStart (biblemulticonverter.format.paratext.ParatextBook.ChapterStart)1 ParagraphKind (biblemulticonverter.format.paratext.ParatextBook.ParagraphKind)1 ParagraphStart (biblemulticonverter.format.paratext.ParatextBook.ParagraphStart)1 ParatextID (biblemulticonverter.format.paratext.ParatextBook.ParatextID)1 TableCellStart (biblemulticonverter.format.paratext.ParatextBook.TableCellStart)1 ParatextCharacterContentPart (biblemulticonverter.format.paratext.ParatextCharacterContent.ParatextCharacterContentPart)1 VerseEnd (biblemulticonverter.format.paratext.ParatextCharacterContent.VerseEnd)1 ChapterIdentifier (biblemulticonverter.format.paratext.model.ChapterIdentifier)1 VerseIdentifier (biblemulticonverter.format.paratext.model.VerseIdentifier)1 NoteContents (biblemulticonverter.schema.usfx.NoteContents)1 PType (biblemulticonverter.schema.usfx.PType)1 RefType (biblemulticonverter.schema.usfx.RefType)1 Usfx (biblemulticonverter.schema.usfx.Usfx)1 Book (biblemulticonverter.schema.usfx.Usfx.Book)1