Search in sources :

Example 31 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class AbstractParatextFormat method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    List<ParatextBook> books = doImportBooks(inputFile);
    String bibleName = null;
    for (ParatextBook book : books) {
        if (bibleName == null || book.getBibleName().isEmpty()) {
            bibleName = book.getBibleName();
        } else {
            String bookBibleName = book.getBibleName();
            // use common suffix
            if (bookBibleName.length() > bibleName.length()) {
                bookBibleName = bookBibleName.substring(bookBibleName.length() - bibleName.length());
            } else if (bibleName.length() > bookBibleName.length()) {
                bibleName = bibleName.substring(bibleName.length() - bookBibleName.length());
            }
            for (int i = bibleName.length() - 1; i >= 0; i--) {
                if (bibleName.charAt(i) != bookBibleName.charAt(i)) {
                    bibleName = bibleName.substring(i + 1);
                    break;
                }
            }
        }
    }
    Bible bible = new Bible((bibleName == null || bibleName.isEmpty()) ? "Imported Bible" : bibleName);
    for (ParatextBook book : books) {
        String longName = book.getAttributes().get("toc1");
        if (longName == null || longName.isEmpty())
            longName = book.getId().getEnglishName();
        String shortName = book.getAttributes().get("toc2");
        if (shortName == null || shortName.isEmpty())
            shortName = longName;
        String abbr = book.getAttributes().get("toc3"), fallbackAbbr = book.getId().getId().getOsisID().replace("x-", "").replace("-", "");
        if (abbr == null)
            abbr = fallbackAbbr;
        abbr = abbr.replace(" ", "");
        if (!Utils.compilePattern(Utils.BOOK_ABBR_REGEX).matcher(abbr).matches()) {
            System.out.println("WARNING: Unsupported book abbreviation " + abbr + ", using " + fallbackAbbr + " instead");
            abbr = fallbackAbbr;
        }
        final Book bk = new Book(abbr, book.getId().getId(), shortName, longName);
        bible.getBooks().add(bk);
        final boolean forceProlog = book.getId().getId().getZefID() < 0;
        final ParatextImportContext ctx = new ParatextImportContext();
        ctx.nt = book.getId().getId().isNT();
        book.accept(new ParatextBookContentVisitor<RuntimeException>() {

            @Override
            public void visitChapterStart(int newChapter) throws RuntimeException {
                if (ctx.cnum != -1 && !ctx.headlines.isEmpty()) {
                    System.out.println("WARNING: Ignoring unreferenced headlines");
                    ctx.headlines.clear();
                }
                if (ctx.cnum == 0 && newChapter == 1) {
                    // we are in prolog (chapter already exists)
                    ctx.cnum = newChapter;
                } else if (newChapter >= 1 && newChapter > ctx.cnum) {
                    if (ctx.cnum == -1)
                        ctx.cnum = 0;
                    while (ctx.cnum < newChapter - 1) {
                        bk.getChapters().add(new Chapter());
                        ctx.cnum++;
                    }
                    ctx.currentChapter = new Chapter();
                    bk.getChapters().add(ctx.currentChapter);
                    ctx.cnum = newChapter;
                } else {
                    System.out.println("WARNING: Ignoring chapter number " + newChapter + ", current chapter is " + ctx.cnum);
                }
                ctx.currentVisitor = null;
                ctx.currentVerse = null;
                ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
            }

            @Override
            public void visitParagraphStart(ParagraphKind kind) throws RuntimeException {
                if (ctx.currentParagraph != ParatextImportContext.CurrentParagraph.NONE) {
                    if (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.PROLOG || (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.NORMAL && ctx.currentVisitor != null)) {
                        ctx.currentVisitor.visitLineBreak(LineBreakKind.PARAGRAPH);
                    }
                    ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
                }
                if (kind.getCategory() == ParagraphKindCategory.SKIP) {
                // do nothing
                } else if (kind.getCategory() == ParagraphKindCategory.HEADLINE) {
                    Headline hl = null;
                    if (kind.isJoinHeadlines() && !ctx.headlines.isEmpty()) {
                        hl = ctx.headlines.get(ctx.headlines.size() - 1);
                        if (hl.getDepth() == kind.getHeadlineDepth() || kind.getHeadlineDepth() == 0) {
                            hl.getAppendVisitor().visitText(" ");
                        } else {
                            hl = null;
                        }
                    }
                    if (hl == null) {
                        hl = new Headline(kind.getHeadlineDepth());
                        ctx.headlines.add(hl);
                    }
                    ctx.currentParagraph = ParatextImportContext.CurrentParagraph.HEADLINE;
                    ctx.currentVisitor = hl.getAppendVisitor();
                    if (kind.getExtraFormatting() != null) {
                        ctx.currentVisitor = ctx.currentVisitor.visitFormattingInstruction(kind.getExtraFormatting());
                    }
                } else {
                    // BLANK_LINE, TABLE_ROW, TEXT
                    if (kind.isProlog() || forceProlog) {
                        if (ctx.cnum == -1) {
                            ctx.cnum = 0;
                            ctx.currentChapter = new Chapter();
                            bk.getChapters().add(ctx.currentChapter);
                        }
                        if (ctx.currentChapter.getProlog() == null) {
                            ctx.currentChapter.setProlog(new FormattedText());
                        }
                        if (!ctx.currentChapter.getVerses().isEmpty()) {
                            System.out.println("WARNING: Adding to prolog after verses have been added!");
                        }
                        ctx.currentVisitor = ctx.currentChapter.getProlog().getAppendVisitor();
                        ctx.currentParagraph = ParatextImportContext.CurrentParagraph.PROLOG;
                        ctx.flushHeadlines();
                    } else {
                        ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NORMAL;
                    }
                }
            }

            @Override
            public void visitTableCellStart(String tag) throws RuntimeException {
                ctx.ensureParagraph();
                if (!tag.matches("t[hc]r?1") && ctx.currentParagraph != ParatextImportContext.CurrentParagraph.HEADLINE && ctx.currentVisitor != null) {
                    ctx.currentVisitor.visitLineBreak(LineBreakKind.NEWLINE_WITH_INDENT);
                }
            }

            @Override
            public void visitParatextCharacterContent(ParatextCharacterContent content) throws RuntimeException {
                ctx.ensureParagraph();
                content.accept(new ParatextImportVisitor(ctx));
            }
        });
        if (!ctx.headlines.isEmpty()) {
            System.out.println("WARNING: Ignoring unreferenced headlines");
            ctx.headlines.clear();
        }
        for (Chapter ch : bk.getChapters()) {
            if (ch.getProlog() != null)
                ch.getProlog().finished();
            for (Verse v : ch.getVerses()) v.finished();
        }
    }
    return bible;
}
Also used : Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText) Book(biblemulticonverter.data.Book) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) Headline(biblemulticonverter.data.FormattedText.Headline) Verse(biblemulticonverter.data.Verse)

Aggregations

FormattedText (biblemulticonverter.data.FormattedText)31 Chapter (biblemulticonverter.data.Chapter)25 Book (biblemulticonverter.data.Book)24 Verse (biblemulticonverter.data.Verse)22 Bible (biblemulticonverter.data.Bible)14 BookID (biblemulticonverter.data.BookID)10 Headline (biblemulticonverter.data.FormattedText.Headline)9 ArrayList (java.util.ArrayList)9 MetadataBook (biblemulticonverter.data.MetadataBook)8 IOException (java.io.IOException)8 Visitor (biblemulticonverter.data.FormattedText.Visitor)7 VirtualVerse (biblemulticonverter.data.VirtualVerse)7 File (java.io.File)6 EnumMap (java.util.EnumMap)5 HashMap (java.util.HashMap)4 Matcher (java.util.regex.Matcher)4 Element (org.w3c.dom.Element)3 Node (org.w3c.dom.Node)3 ExtraAttributePriority (biblemulticonverter.data.FormattedText.ExtraAttributePriority)2 FormattingInstructionKind (biblemulticonverter.data.FormattedText.FormattingInstructionKind)2