Search in sources :

Example 6 with ChapterIdentifier

use of biblemulticonverter.format.paratext.model.ChapterIdentifier in project BibleMultiConverter by schierlm.

the class USX method doExportBook.

@Override
protected void doExportBook(ParatextBook book, File outFile) throws Exception {
    ObjectFactory of = new ObjectFactory();
    Usx usx = of.createUsx();
    usx.setVersion("2.5");
    usx.setBook(of.createBook());
    usx.getBook().setStyle("id");
    usx.getBook().setCode(book.getId().getIdentifier());
    usx.getBook().setContent(book.getBibleName());
    for (Map.Entry<String, String> attr : book.getAttributes().entrySet()) {
        Para para = new Para();
        para.setStyle(ParaStyle.fromValue(attr.getKey()));
        para.getContent().add(attr.getValue());
        usx.getParaOrTableOrChapter().add(para);
    }
    book.accept(new ParatextBookContentVisitor<IOException>() {

        List<Object> currentContent = null;

        Table currentTable = null;

        @Override
        public void visitChapterStart(ChapterIdentifier location) throws IOException {
            Chapter ch = new Chapter();
            ch.setStyle("c");
            ch.setNumber(BigInteger.valueOf(location.chapter));
            usx.getParaOrTableOrChapter().add(ch);
            currentContent = null;
            currentTable = null;
        }

        @Override
        public void visitChapterEnd(ChapterIdentifier location) throws IOException {
        // Chapter end does not exist in USX 2
        }

        @Override
        public void visitParagraphStart(ParagraphKind kind) throws IOException {
            if (kind == ParagraphKind.TABLE_ROW) {
                if (currentTable == null) {
                    currentTable = new Table();
                    usx.getParaOrTableOrChapter().add(currentTable);
                }
                Row row = new Row();
                row.setStyle("tr");
                currentTable.getRow().add(row);
                currentContent = currentTable.getRow().get(currentTable.getRow().size() - 1).getVerseOrCell();
            } else if (USX_2_PARAGRAPH_KINDS.contains(kind)) {
                ParaStyle style = PARA_KIND_MAP.get(kind);
                if (style == null) {
                    throw new RuntimeException("Error could not get ParaStyle for ParagraphKind: " + kind);
                }
                Para para = new Para();
                para.setStyle(style);
                usx.getParaOrTableOrChapter().add(para);
                currentContent = para.getContent();
                currentTable = null;
            } else {
                visitUnsupportedParagraphStart(kind);
            }
        }

        private void visitUnsupportedParagraphStart(ParagraphKind kind) throws IOException {
            if (kind == ParagraphKind.HEBREW_NOTE) {
                // See: USFM.visitUnsupportedParagraphStart
                visitParagraphStart(ParagraphKind.DESCRIPTIVE_TITLE);
                logger.logReplaceWarning(kind, ParagraphKind.DESCRIPTIVE_TITLE);
            } else if (kind.isSameBase(ParagraphKind.SEMANTIC_DIVISION)) {
                // See: USFM.visitUnsupportedParagraphStart
                visitParagraphStart(ParagraphKind.BLANK_LINE);
                logger.logReplaceWarning(kind, ParagraphKind.BLANK_LINE);
            } else if (kind == ParagraphKind.PARAGRAPH_PO || kind == ParagraphKind.PARAGRAPH_LH || kind == ParagraphKind.PARAGRAPH_LF) {
                // See: USFM.visitUnsupportedParagraphStart
                logger.logReplaceWarning(kind, ParagraphKind.PARAGRAPH_P);
                visitParagraphStart(ParagraphKind.PARAGRAPH_P);
            } else if (kind.getTag().startsWith(ParagraphKind.PARAGRAPH_LIM.getTag())) {
                ParagraphKind replacement = ParagraphKind.PARAGRAPH_LI.getWithNumber(kind.getNumber());
                logger.logReplaceWarning(kind, replacement);
                visitParagraphStart(replacement);
            } else {
                throw new RuntimeException("Could not export to USX 2 because an unhandled paragraph type `" + kind + "` from a newer USFM/USX version was found.");
            }
        }

        @Override
        public void visitTableCellStart(String tag) throws IOException {
            if (currentTable == null) {
                System.out.println("WARNING: Table cell outside of table");
                return;
            }
            Row currentRow = currentTable.getRow().get(currentTable.getRow().size() - 1);
            Cell cell = new Cell();
            cell.setAlign(tag.contains("r") ? CellAlign.END : CellAlign.START);
            cell.setStyle(CellStyle.fromValue(tag));
            currentRow.getVerseOrCell().add(cell);
            currentContent = cell.getContent();
        }

        @Override
        public void visitParatextCharacterContent(ParatextCharacterContent content) throws IOException {
            if (currentContent == null)
                visitParagraphStart(ParagraphKind.PARAGRAPH_P);
            content.accept(new USXCharacterContentVisitor(logger, currentContent));
        }
    });
    JAXBContext ctx = JAXBContext.newInstance(ObjectFactory.class.getPackage().getName());
    Marshaller m = ctx.createMarshaller();
    if (!Boolean.getBoolean("biblemulticonverter.skipxmlvalidation"))
        m.setSchema(getSchema());
    m.marshal(usx, new UnifiedScriptureXMLWriter(new FileWriter(outFile), "UTF-8"));
}
Also used : FileWriter(java.io.FileWriter) JAXBContext(javax.xml.bind.JAXBContext) ObjectFactory(biblemulticonverter.schema.usx.ObjectFactory) ParaStyle(biblemulticonverter.schema.usx.ParaStyle) Cell(biblemulticonverter.schema.usx.Cell) Marshaller(javax.xml.bind.Marshaller) Table(biblemulticonverter.schema.usx.Table) Para(biblemulticonverter.schema.usx.Para) UnifiedScriptureXMLWriter(biblemulticonverter.format.paratext.utilities.UnifiedScriptureXMLWriter) Chapter(biblemulticonverter.schema.usx.Chapter) IOException(java.io.IOException) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) Usx(biblemulticonverter.schema.usx.Usx) Row(biblemulticonverter.schema.usx.Row) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) Map(java.util.Map) EnumMap(java.util.EnumMap)

Example 7 with ChapterIdentifier

use of biblemulticonverter.format.paratext.model.ChapterIdentifier in project BibleMultiConverter by schierlm.

the class USFM method doImportBook.

private ParatextBook doImportBook(File inputFile, Charset charset) throws Exception {
    KNOWN_CHARACTER_TAGS.addAll(AUTO_CLOSING_TAGS.keySet());
    if (!inputFile.getName().toLowerCase().endsWith(".usfm") && !inputFile.getName().toLowerCase().endsWith(".sfm"))
        return null;
    String data = TextUtilities.usfmWhitespaceNormalization(new String(Files.readAllBytes(inputFile.toPath()), charset), preserveSpacesAtEndOfLines) + "\\$EOF$";
    if (!data.startsWith("\\id ")) {
        System.out.println("WARNING: Skipping malformed file " + inputFile);
        return null;
    }
    int startPos = data.indexOf("\\", 2);
    int finalPos = data.length() - "\\$EOF$".length();
    String[] idParts = data.substring(4, startPos).trim().split(" ", 2);
    ParatextID id = ParatextID.fromIdentifier(idParts[0].toUpperCase());
    if (id == null) {
        System.out.println("WARNING: Skipping book with unknown ID: " + idParts[0]);
        return null;
    }
    ParatextBook result = new ParatextBook(id, idParts.length == 1 ? "" : idParts[1]);
    List<ParatextCharacterContentContainer> containerStack = new ArrayList<>();
    boolean ignoreAutoClosingTags = Boolean.getBoolean("biblemulticonverter.usfm.ignoreautoclosingtags");
    VerseStart openVerse = null;
    ChapterStart openChapter = null;
    while (startPos < finalPos) {
        if (data.charAt(startPos) != '\\')
            throw new IllegalStateException();
        int pos = data.indexOf('\\', startPos + 1);
        String textPart = data.substring(startPos + 1, pos);
        startPos = pos;
        pos = Math.min(textPart.length(), 1 + Math.min((textPart + " ").indexOf(' '), (textPart + "*").indexOf('*')));
        String tag = textPart.substring(0, pos).trim().toLowerCase();
        textPart = textPart.substring(pos);
        if (textPart.endsWith(" ")) {
            String nextTag = data.substring(startPos + 1, Math.min(data.length(), startPos + 10)) + " *\\";
            pos = Math.min(nextTag.indexOf('\\'), Math.min(nextTag.indexOf(' '), nextTag.indexOf('*')));
            if (!KNOWN_CHARACTER_TAGS.contains(nextTag.substring(0, pos))) {
                textPart = textPart.substring(0, textPart.length() - 1);
            }
        }
        if (containerStack.isEmpty() && (AUTO_CLOSING_TAGS.containsKey(tag) || tag.equals("v") || FOOTNOTE_XREF_TAGS.containsKey(tag))) {
            ParatextCharacterContent container = new ParatextCharacterContent();
            result.getContent().add(container);
            containerStack.add(container);
        }
        boolean closeCharacterAttributes = false;
        if (PARAGRAPH_TAGS.containsKey(tag)) {
            ParagraphKind kind = PARAGRAPH_TAGS.get(tag);
            // if (kind.getCategory() != ParatextBook.ParagraphKindCategory.TEXT) {
            // Close any open verse
            // openVerse = closeOpenVerse(result, openVerse, false);
            // }
            result.getContent().add(new ParagraphStart(kind));
            closeCharacterAttributes = true;
        } else if (tag.endsWith("*")) {
            String rawTag = tag.substring(0, tag.length() - 1);
            while (!containerStack.isEmpty() && containerStack.get(containerStack.size() - 1) instanceof AutoClosingFormatting) {
                AutoClosingFormatting acc = (AutoClosingFormatting) containerStack.get(containerStack.size() - 1);
                if (acc.getUsedTag().equals(rawTag))
                    break;
                containerStack.remove(containerStack.size() - 1);
            }
            boolean found = false;
            if (AUTO_CLOSING_TAGS.containsKey(rawTag)) {
                if (!containerStack.isEmpty() && containerStack.get(containerStack.size() - 1) instanceof AutoClosingFormatting) {
                    AutoClosingFormatting acc = (AutoClosingFormatting) containerStack.get(containerStack.size() - 1);
                    found = acc.getUsedTag().equals(rawTag);
                }
            } else if (FOOTNOTE_XREF_TAGS.containsKey(rawTag)) {
                if (!containerStack.isEmpty() && containerStack.get(containerStack.size() - 1) instanceof FootnoteXref) {
                    FootnoteXref fx = (FootnoteXref) containerStack.get(containerStack.size() - 1);
                    found = fx.getKind().getTag().equals(rawTag);
                }
            } else {
                System.out.println("WARNING: Skipping unknown end tag \\" + tag);
            }
            if (found) {
                containerStack.remove(containerStack.size() - 1);
            } else {
                System.out.println("WARNING: Skipping mismatched end tag \\" + tag);
            }
        } else if (AUTO_CLOSING_TAGS.containsKey(tag)) {
            if (!tag.startsWith("+") && !ignoreAutoClosingTags) {
                while (!containerStack.isEmpty() && containerStack.get(containerStack.size() - 1) instanceof AutoClosingFormatting) {
                    containerStack.remove(containerStack.size() - 1);
                }
            }
            AutoClosingFormatting nextContainer = new AutoClosingFormatting(AUTO_CLOSING_TAGS.get(tag), tag.startsWith("+"));
            containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
            containerStack.add(nextContainer);
            if (nextContainer.getKind().getDefaultAttributes() != null && data.startsWith("\\" + tag + "*", startPos) && textPart.contains("|")) {
                String[] defaultAttributes = nextContainer.getKind().getDefaultAttributes();
                String[] parts = textPart.split("\\|");
                for (int i = 1; i < parts.length; i++) {
                    if (parts[i].contains("=")) {
                        String attList = parts[i];
                        while (attList.contains("=")) {
                            pos = attList.indexOf('=');
                            String key = attList.substring(0, pos).trim();
                            attList = attList.substring(pos + 1).trim();
                            if (attList.startsWith("\"")) {
                                pos = attList.indexOf('"', 1);
                                nextContainer.getAttributes().put(key, attList.substring(1, pos));
                                attList = attList.substring(pos + 1).trim();
                            } else {
                                nextContainer.getAttributes().put(key, attList);
                                attList = "";
                            }
                        }
                    } else if (i - 1 < defaultAttributes.length) {
                        nextContainer.getAttributes().put(defaultAttributes[i - 1], parts[i]);
                    }
                }
                textPart = parts[0];
                if (textPart.endsWith(" ")) {
                    textPart = textPart.substring(0, textPart.length() - 1);
                }
            }
        } else if (tag.equals("v")) {
            ImportUtilities.closeOpenVerse(result, openVerse);
            String[] parts = textPart.split(" ", 2);
            ChapterStart chapter = result.findLastBookContent(ChapterStart.class);
            if (chapter == null) {
                throw new IllegalStateException("Verse \\v found before chapter start milestone");
            }
            // A verse number in USFM 2 may be in the format 6-7, 6a or even 6-7a.
            // Attempt to parse these numbers by first adding the book and chapter and then parsing it as a whole.
            VerseIdentifier location = VerseIdentifier.fromStringOrThrow(openChapter.getLocation() + ":" + parts[0]);
            openVerse = new VerseStart(location, parts[0]);
            containerStack.get(containerStack.size() - 1).getContent().add(openVerse);
            textPart = parts.length == 1 ? "" : parts[1];
        } else if (tag.equals("c")) {
            ImportUtilities.closeOpenVerse(result, openVerse);
            openVerse = null;
            // There is not really a good way to accurately determine where the end of a chapter should be placed
            // based on USFM 2 content. Maybe a title above this chapter marker was already intended to be part of
            // this chapter. This is basically a best guess. This should not really matter when converting from
            // USFM 2 to USX 2 or USFX (which is based on USFM 2), however when up-converting to USX 3 this might
            // lead to unexpected results.
            ImportUtilities.closeOpenChapter(result, openChapter);
            String[] parts = textPart.split(" ", 2);
            if (!parts[0].matches("[0-9]+"))
                throw new NumberFormatException("Invalid chapter number in \\c " + textPart);
            openChapter = new ChapterStart(new ChapterIdentifier(id, Integer.parseInt(parts[0])));
            result.getContent().add(openChapter);
            closeCharacterAttributes = true;
            textPart = parts.length == 1 ? "" : parts[1];
        } else if (tag.matches("t[hc]r?[0-9]+")) {
            result.getContent().add(new TableCellStart(tag));
            closeCharacterAttributes = true;
        } else if (FOOTNOTE_XREF_TAGS.containsKey(tag)) {
            String[] parts = textPart.split(" ", 2);
            FootnoteXref nextContainer = new FootnoteXref(FOOTNOTE_XREF_TAGS.get(tag), parts[0]);
            containerStack.get(containerStack.size() - 1).getContent().add(nextContainer);
            containerStack.add(nextContainer);
            textPart = parts.length == 1 ? "" : parts[1];
        } else if (tag.equals("id")) {
            System.out.println("WARNING: Skipping duplicate \\id tag");
            textPart = "";
        } else if (tag.equals("ide")) {
            Charset correctCharset;
            try {
                if (textPart.matches("[0-9]+ - .*")) {
                    int codepage = Integer.parseInt(textPart.replaceAll(" - .*", ""));
                    correctCharset = codepage == 65001 ? StandardCharsets.UTF_8 : Charset.forName("windows-" + codepage);
                } else {
                    correctCharset = Charset.forName(textPart);
                }
            } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
                System.out.println("WARNING: Unknown charset " + textPart + " specified, falling back to ISO-8859-1");
                correctCharset = StandardCharsets.ISO_8859_1;
            }
            if (!correctCharset.equals(charset)) {
                if (!charset.equals(StandardCharsets.UTF_8)) {
                    throw new IOException("Two charsets specified: " + charset + " and " + correctCharset);
                }
                return doImportBook(inputFile, correctCharset);
            } else {
                result.getAttributes().put(tag, textPart);
            }
            textPart = "";
        } else if (BOOK_HEADER_ATTRIBUTE_TAGS.contains(tag)) {
            result.getAttributes().put(tag, textPart);
            textPart = "";
        } else {
            System.out.println("WARNING: Skipping unknown tag \\" + tag);
        }
        if (closeCharacterAttributes) {
            containerStack.clear();
        }
        textPart = textPart.replace(" // ", " ").replace("~", "\u00A0");
        ParatextCharacterContent.Text text = ParatextCharacterContent.Text.from(textPart);
        if (text != null) {
            if (containerStack.isEmpty()) {
                ParatextCharacterContent container = new ParatextCharacterContent();
                containerStack.add(container);
                result.getContent().add(container);
            }
            containerStack.get(containerStack.size() - 1).getContent().add(text);
        }
    }
    ImportUtilities.closeOpenVerse(result, openVerse);
    ImportUtilities.closeOpenChapter(result, openChapter);
    return result;
}
Also used : TableCellStart(biblemulticonverter.format.paratext.ParatextBook.TableCellStart) ArrayList(java.util.ArrayList) IllegalCharsetNameException(java.nio.charset.IllegalCharsetNameException) ParatextID(biblemulticonverter.format.paratext.ParatextBook.ParatextID) AutoClosingFormatting(biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormatting) Charset(java.nio.charset.Charset) IOException(java.io.IOException) ChapterStart(biblemulticonverter.format.paratext.ParatextBook.ChapterStart) FootnoteXref(biblemulticonverter.format.paratext.ParatextCharacterContent.FootnoteXref) VerseIdentifier(biblemulticonverter.format.paratext.model.VerseIdentifier) VerseStart(biblemulticonverter.format.paratext.ParatextCharacterContent.VerseStart) ParatextCharacterContentContainer(biblemulticonverter.format.paratext.ParatextBook.ParatextCharacterContentContainer) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) UnsupportedCharsetException(java.nio.charset.UnsupportedCharsetException) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) ParagraphStart(biblemulticonverter.format.paratext.ParatextBook.ParagraphStart)

Example 8 with ChapterIdentifier

use of biblemulticonverter.format.paratext.model.ChapterIdentifier in project BibleMultiConverter by schierlm.

the class AbstractParatextFormat method importParatextBook.

protected final Book importParatextBook(ParatextBook book, Map<ParatextID, String> bookAbbrs) {
    String longName = book.getAttributes().get("toc1");
    if (longName == null || longName.isEmpty())
        longName = book.getId().getEnglishName();
    String shortName = book.getAttributes().get("toc2");
    if (shortName == null || shortName.isEmpty())
        shortName = longName;
    final Book bk = new Book(bookAbbrs.get(book.getId()), book.getId().getId(), shortName, longName);
    final boolean forceProlog = book.getId().getId().getZefID() < 0;
    final ParatextImportContext ctx = new ParatextImportContext();
    ctx.bookAbbrs = bookAbbrs;
    book.fixTrailingWhitespace();
    book.accept(new ParatextBookContentVisitor<RuntimeException>() {

        @Override
        public void visitChapterStart(ChapterIdentifier location) throws RuntimeException {
            if (ctx.cnum != -1 && !ctx.headlines.isEmpty()) {
                System.out.println("WARNING: Ignoring unreferenced headlines");
                ctx.headlines.clear();
            }
            int newChapter = location.chapter;
            if (ctx.cnum == 0 && newChapter == 1) {
                // we are in prolog (chapter already exists)
                ctx.cnum = newChapter;
            } else if (newChapter >= 1 && newChapter > ctx.cnum) {
                if (ctx.cnum == -1)
                    ctx.cnum = 0;
                while (ctx.cnum < newChapter - 1) {
                    bk.getChapters().add(new Chapter());
                    ctx.cnum++;
                }
                ctx.currentChapter = new Chapter();
                bk.getChapters().add(ctx.currentChapter);
                ctx.cnum = newChapter;
            } else {
                System.out.println("WARNING: Ignoring chapter number " + newChapter + ", current chapter is " + ctx.cnum);
            }
            ctx.currentVisitor = null;
            ctx.currentVisitorExtraCSS = null;
            ctx.currentVerse = null;
            ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
            ctx.currentParagraphExtraCSS = null;
        }

        @Override
        public void visitChapterEnd(ChapterIdentifier location) throws RuntimeException {
        // Not supported in the internal format
        }

        @Override
        public void visitParagraphStart(ParagraphKind kind) throws RuntimeException {
            if (exportAllTags && kind.getCategory() == ParagraphKindCategory.SKIP && ctx.currentVisitor != null) {
                ctx.currentVisitor.visitCSSFormatting("-bmc-usfm-tag: " + kind.getTag()).visitText("\uFEFF");
            }
            if (ctx.currentParagraph != ParatextImportContext.CurrentParagraph.NONE) {
                if (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.PROLOG || (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.NORMAL && ctx.currentVisitor != null)) {
                    ctx.currentVisitor.visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                if (ctx.currentParagraph != ParatextImportContext.CurrentParagraph.NORMAL) {
                    ctx.currentVisitor = null;
                    ctx.currentVisitorExtraCSS = null;
                }
                ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
                ctx.currentParagraphExtraCSS = null;
            }
            if (kind.getCategory() == ParagraphKindCategory.SKIP) {
            // do nothing
            } else if (kind.getCategory() == ParagraphKindCategory.HEADLINE) {
                Headline hl = null;
                if (kind.isJoinHeadlines() && !ctx.headlines.isEmpty()) {
                    hl = ctx.headlines.get(ctx.headlines.size() - 1);
                    if (hl.getDepth() == kind.getHeadlineDepth() || kind.getHeadlineDepth() == 0) {
                        hl.getAppendVisitor().visitText(" ");
                    } else {
                        hl = null;
                    }
                }
                if (hl == null) {
                    hl = new Headline(kind.getHeadlineDepth());
                    ctx.headlines.add(hl);
                }
                ctx.currentParagraph = ParatextImportContext.CurrentParagraph.HEADLINE;
                ctx.currentVisitor = hl.getAppendVisitor();
                if (kind.getExtraFormatting() != null) {
                    ctx.currentVisitor = ctx.currentVisitor.visitFormattingInstruction(kind.getExtraFormatting());
                }
                if (exportAllTags) {
                    ctx.currentParagraphExtraCSS = "-bmc-usfm-tag: " + kind.getTag();
                    ctx.currentVisitor = ctx.currentVisitor.visitCSSFormatting(ctx.currentParagraphExtraCSS);
                } else {
                    ctx.currentParagraphExtraCSS = null;
                }
                ctx.currentVisitorExtraCSS = ctx.currentParagraphExtraCSS;
            } else {
                // BLANK_LINE, TABLE_ROW, TEXT
                if (kind.isProlog() || forceProlog) {
                    if (ctx.cnum == -1) {
                        ctx.cnum = 0;
                        ctx.currentChapter = new Chapter();
                        bk.getChapters().add(ctx.currentChapter);
                    }
                    if (ctx.currentChapter.getProlog() == null) {
                        ctx.currentChapter.setProlog(new FormattedText());
                    }
                    if (!ctx.currentChapter.getVerses().isEmpty()) {
                        System.out.println("WARNING: Adding to prolog after verses have been added!");
                    }
                    ctx.currentVisitor = ctx.currentChapter.getProlog().getAppendVisitor();
                    ctx.currentParagraph = ParatextImportContext.CurrentParagraph.PROLOG;
                    ctx.flushHeadlines();
                    if (exportAllTags) {
                        ctx.currentParagraphExtraCSS = "-bmc-usfm-tag: " + kind.getTag();
                        ctx.currentVisitor = ctx.currentVisitor.visitCSSFormatting(ctx.currentParagraphExtraCSS);
                    } else {
                        ctx.currentParagraphExtraCSS = null;
                    }
                    ctx.currentVisitorExtraCSS = ctx.currentParagraphExtraCSS;
                } else {
                    ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NORMAL;
                    if (kind == ParagraphKind.DESCRIPTIVE_TITLE) {
                        ctx.currentParagraphExtraCSS = "font-style: italic; -bmc-psalm-title: true;" + (exportAllTags ? " -bmc-usfm-tag: " + kind.getTag() : "");
                    } else if (kind != ParagraphKind.PARAGRAPH_P && exportAllTags) {
                        ctx.currentParagraphExtraCSS = "-bmc-usfm-tag: " + kind.getTag();
                    } else {
                        ctx.currentParagraphExtraCSS = null;
                    }
                }
            }
        }

        @Override
        public void visitTableCellStart(String tag) throws RuntimeException {
            ctx.ensureParagraph();
            if (exportAllTags && ctx.currentVisitor != null) {
                ctx.currentVisitor.visitCSSFormatting("-bmc-usfm-tag: " + tag).visitText("\uFEFF");
            }
            if (!tag.matches("t[hc]r?1") && ctx.currentParagraph != ParatextImportContext.CurrentParagraph.HEADLINE && ctx.currentVisitor != null) {
                ctx.currentVisitor.visitLineBreak(LineBreakKind.NEWLINE_WITH_INDENT);
            }
        }

        @Override
        public void visitParatextCharacterContent(ParatextCharacterContent content) throws RuntimeException {
            ctx.ensureParagraph();
            content.accept(new ParatextImportVisitor(ctx));
        }
    });
    if (!ctx.headlines.isEmpty()) {
        System.out.println("WARNING: Ignoring unreferenced headlines");
        ctx.headlines.clear();
    }
    for (Chapter ch : bk.getChapters()) {
        if (ch.getProlog() != null)
            ch.getProlog().finished();
        for (Verse v : ch.getVerses()) v.finished();
    }
    return bk;
}
Also used : Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText) Book(biblemulticonverter.data.Book) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) Headline(biblemulticonverter.data.FormattedText.Headline) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) Verse(biblemulticonverter.data.Verse)

Example 9 with ChapterIdentifier

use of biblemulticonverter.format.paratext.model.ChapterIdentifier in project BibleMultiConverter by schierlm.

the class ParatextDump method doImportAllBooks.

@Override
protected List<ParatextBook> doImportAllBooks(File inputFile) throws Exception {
    List<ParatextBook> result = new ArrayList<ParatextBook>();
    ParatextBook currentBook = null;
    Map<String, ParagraphKind> allParagraphKinds = ParagraphKind.allTags();
    try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile), StandardCharsets.UTF_8))) {
        String line;
        while ((line = br.readLine()) != null) {
            String[] parts = line.split("\t", 3);
            switch(parts[0]) {
                case "BOOK":
                    result.add(currentBook = new ParatextBook(ParatextID.fromIdentifier(parts[1]), parts[2]));
                    break;
                case "BOOKATTR":
                    currentBook.getAttributes().put(parts[1], parts[2]);
                    break;
                case "CHAPTER":
                    currentBook.getContent().add(new ChapterStart(new ChapterIdentifier(currentBook.getId(), Integer.parseInt(parts[1]))));
                    break;
                case "CHAPTER-END":
                    currentBook.getContent().add(new ParatextBook.ChapterEnd(ChapterIdentifier.fromLocationString(parts[1])));
                    break;
                case "PARAGRAPH":
                    currentBook.getContent().add(new ParagraphStart(Objects.requireNonNull(allParagraphKinds.get(parts[1]))));
                    break;
                case "TABLECELL":
                    currentBook.getContent().add(new TableCellStart(parts[1]));
                    break;
                case "CHARCONTENTSTART":
                    ParatextCharacterContent cc = new ParatextCharacterContent();
                    currentBook.getContent().add(cc);
                    while (!(line = br.readLine()).equals("CHARCONTENTEND")) {
                        importCharContent(cc.getContent(), br, line);
                    }
                    break;
                default:
                    throw new IOException(line);
            }
        }
    }
    return result;
}
Also used : InputStreamReader(java.io.InputStreamReader) TableCellStart(biblemulticonverter.format.paratext.ParatextBook.TableCellStart) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ChapterStart(biblemulticonverter.format.paratext.ParatextBook.ChapterStart) FileInputStream(java.io.FileInputStream) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) BufferedReader(java.io.BufferedReader) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) ParagraphStart(biblemulticonverter.format.paratext.ParatextBook.ParagraphStart)

Example 10 with ChapterIdentifier

use of biblemulticonverter.format.paratext.model.ChapterIdentifier in project BibleMultiConverter by schierlm.

the class AbstractParatextFormatTest method test_on_import_paratext_verse_numbers_are_transformed_to_internal_verse_numbers.

@Test
public void test_on_import_paratext_verse_numbers_are_transformed_to_internal_verse_numbers() {
    // Dummy paratext book
    ParatextBook paratextBook = new ParatextBook(ParatextBook.ParatextID.ID_1CO, null);
    paratextBook.getContent().add(new ParatextBook.ChapterStart(new ChapterIdentifier(paratextBook.getId(), 1)));
    paratextBook.getContent().add(new ParatextBook.ParagraphStart(ParatextBook.ParagraphKind.PARAGRAPH_P));
    ParatextCharacterContent characterContent = new ParatextCharacterContent();
    paratextBook.getContent().add(characterContent);
    // Normal verse number
    addDummyVerse(characterContent, new VerseIdentifier(paratextBook.getId(), 1, "5", null), "5");
    // Paratext only supported verse number
    addDummyVerse(characterContent, new VerseIdentifier(paratextBook.getId(), 1, "6b", "7a"), "6b-7a");
    paratextBook.getContent().add(new ParatextBook.ChapterEnd(new ChapterIdentifier(paratextBook.getId(), 1)));
    AbstractParatextFormat format = new TestParatextFormat();
    Map<ParatextBook.ParatextID, String> abbrs = new EnumMap<>(ParatextBook.ParatextID.class);
    abbrs.put(ParatextBook.ParatextID.ID_1CO, "1co");
    Book book = format.importParatextBook(paratextBook, abbrs);
    Chapter chapter = book.getChapters().get(0);
    assertEquals("5", chapter.getVerses().get(0).getNumber());
    assertEquals("6b", chapter.getVerses().get(1).getNumber());
}
Also used : Chapter(biblemulticonverter.data.Chapter) VerseIdentifier(biblemulticonverter.format.paratext.model.VerseIdentifier) Book(biblemulticonverter.data.Book) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) EnumMap(java.util.EnumMap) Test(org.junit.Test)

Aggregations

ChapterIdentifier (biblemulticonverter.format.paratext.model.ChapterIdentifier)10 IOException (java.io.IOException)7 ParagraphKind (biblemulticonverter.format.paratext.ParatextBook.ParagraphKind)6 ChapterStart (biblemulticonverter.format.paratext.ParatextBook.ChapterStart)4 ParagraphStart (biblemulticonverter.format.paratext.ParatextBook.ParagraphStart)4 JAXBContext (javax.xml.bind.JAXBContext)4 ParatextID (biblemulticonverter.format.paratext.ParatextBook.ParatextID)3 TableCellStart (biblemulticonverter.format.paratext.ParatextBook.TableCellStart)3 AutoClosingFormatting (biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormatting)3 FootnoteXref (biblemulticonverter.format.paratext.ParatextCharacterContent.FootnoteXref)3 VerseIdentifier (biblemulticonverter.format.paratext.model.VerseIdentifier)3 FileInputStream (java.io.FileInputStream)3 EnumMap (java.util.EnumMap)3 Map (java.util.Map)3 Book (biblemulticonverter.data.Book)2 Chapter (biblemulticonverter.data.Chapter)2 VerseStart (biblemulticonverter.format.paratext.ParatextCharacterContent.VerseStart)2 UnifiedScriptureXMLWriter (biblemulticonverter.format.paratext.utilities.UnifiedScriptureXMLWriter)2 Cell (biblemulticonverter.schema.usx.Cell)2 Chapter (biblemulticonverter.schema.usx.Chapter)2