Search in sources :

Example 21 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class MyBibleZone method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    SqlJetDb db = SqlJetDb.open(inputFile, false);
    SqlJetDb footnoteDB = null;
    File footnoteFile = new File(inputFile.getParentFile(), inputFile.getName().replace(".SQLite3", ".commentaries.SQLite3"));
    if (inputFile.getName().endsWith(".SQLite3") && footnoteFile.exists()) {
        footnoteDB = SqlJetDb.open(footnoteFile, false);
        if (!footnoteDB.getTable("commentaries").getIndexesNames().contains("commentaries_index")) {
            footnoteDB.close();
            footnoteDB = SqlJetDb.open(footnoteFile, true);
            checkIndex(footnoteDB, "commentaries", "commentaries_index", "CREATE INDEX commentaries_index on commentaries(book_number, chapter_number_from, verse_number_from)");
        }
        footnoteDB.beginTransaction(SqlJetTransactionMode.READ_ONLY);
    }
    if (!db.getTable("verses").getIndexesNames().contains("versesIndex") || (db.getSchema().getTable("stories") != null && !db.getTable("stories").getIndexesNames().contains("stories_index"))) {
        db.close();
        db = SqlJetDb.open(inputFile, true);
        checkIndex(db, "verses", "verses_index", "CREATE UNIQUE INDEX verses_index on verses (book_number, chapter, verse)");
        if (db.getSchema().getTable("stories") != null)
            if (db.getSchema().getTable("stories").getColumn("order_if_several") == null)
                checkIndex(db, "stories", "stories_index", "CREATE UNIQUE INDEX stories_index on stories(book_number, chapter, verse)");
            else
                checkIndex(db, "stories", "stories_index", "CREATE UNIQUE INDEX stories_index on stories(book_number, chapter, verse, order_if_several)");
    }
    db.beginTransaction(SqlJetTransactionMode.READ_ONLY);
    String bibleName = null;
    MetadataBook mb = new MetadataBook();
    ISqlJetCursor cursor = db.getTable("info").open();
    while (!cursor.eof()) {
        String fn = cursor.getString("name");
        String fv = cursor.getString("value");
        if (fn.equals("description")) {
            bibleName = fv;
        } else if (!fv.isEmpty()) {
            fv = fv.replaceAll("[\r\n]+", "\n").replaceAll(" *\n *", "\n").replaceAll("\n$", "");
            try {
                mb.setValue("MyBible.zone@" + fn.replace('_', '.'), fv);
            } catch (IllegalArgumentException ex) {
                System.out.println("WARNING: Skipping malformed metadata property " + fn);
            }
        }
        cursor.next();
    }
    cursor.close();
    if (bibleName == null) {
        System.out.println("WARNING: No bible name in info table");
        bibleName = inputFile.getName();
    }
    Bible result = new Bible(bibleName.trim());
    if (!mb.getKeys().isEmpty()) {
        mb.finished();
        result.getBooks().add(mb.getBook());
    }
    Map<Integer, Book> bookIDMap = new HashMap<>();
    cursor = db.getTable("books").open();
    while (!cursor.eof()) {
        int num = (int) cursor.getInteger("book_number");
        String col = cursor.getString("book_color");
        String shortName = cursor.getString("short_name").trim().replace(" ", "").replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (!shortName.isEmpty())
            shortName = shortName.substring(0, 1).toUpperCase() + shortName.substring(1);
        String longName = cursor.getString("long_name").trim();
        BookID bid = null;
        for (MyBibleZoneBook bi : BOOK_INFO) {
            if (bi.bookNumber == num) {
                bid = bi.bookID;
                if (!col.equals(bi.bookColor))
                    System.out.println("WARNING: Book " + bid.getOsisID() + " uses color " + col + " and not " + bi.bookColor);
            }
        }
        if (bid == null) {
            System.out.println("WARNING: Book number " + num + " unknown; skipping: " + shortName + "/" + longName);
            // generate dummy entry not stored in result object
            bookIDMap.put(num, new Book("Xxx", BookID.BOOK_Gen, "X", "X"));
        } else {
            if (shortName.length() < 2)
                shortName = bid.getOsisID().replaceAll("[^A-Z0-9a-zäöü]++", "");
            Book bk = new Book(shortName, bid, longName, longName);
            result.getBooks().add(bk);
            bookIDMap.put(num, bk);
        }
        cursor.next();
    }
    cursor.close();
    if (db.getSchema().getTable("introductions") != null) {
        cursor = db.getTable("introductions").open();
        while (!cursor.eof()) {
            int num = (int) cursor.getInteger("book_number");
            String intro = cursor.getString("introduction");
            Book bk;
            if (num == 0) {
                bk = new Book("Intro", BookID.INTRODUCTION, "_Introduction_", "_Introduction_");
                if (!result.getBooks().isEmpty() && result.getBooks().get(0).getId().equals(BookID.METADATA)) {
                    result.getBooks().add(1, bk);
                } else {
                    result.getBooks().add(0, bk);
                }
            } else {
                bk = bookIDMap.get(num);
            }
            if (bk == null) {
                System.out.println("WARNING: Skipping introduction for nonexisting book " + num);
            } else {
                FormattedText ft = new FormattedText();
                convertFromHTML(intro, ft.getAppendVisitor());
                ft.finished();
                if (bk.getChapters().isEmpty())
                    bk.getChapters().add(new Chapter());
                bk.getChapters().get(0).setProlog(ft);
            }
            cursor.next();
        }
        cursor.close();
    }
    cursor = db.getTable("verses").order("verses_index");
    while (!cursor.eof()) {
        int b = (int) cursor.getInteger("book_number");
        int c = (int) cursor.getInteger("chapter");
        int v = (int) cursor.getInteger("verse");
        String text = cursor.getString("text");
        if (text == null)
            text = "";
        text = text.trim();
        if (!text.isEmpty()) {
            Book bk = bookIDMap.get(b);
            if (bk == null) {
                System.out.println("WARNING: Verse for unknown book " + b + " skipped");
            } else {
                while (bk.getChapters().size() < c) bk.getChapters().add(new Chapter());
                Chapter ch = bk.getChapters().get(c - 1);
                Verse vv = new Verse("" + v);
                try {
                    String rest = convertFromVerse(text, vv.getAppendVisitor(), footnoteDB, new int[] { b, c, v });
                    if (!rest.isEmpty()) {
                        System.out.println("WARNING: Treating tags as plaintext: " + rest);
                        vv.getAppendVisitor().visitText(rest.replace('\t', ' ').replaceAll("  +", " "));
                    }
                } catch (RuntimeException ex) {
                    throw new RuntimeException(text, ex);
                }
                ch.getVerses().add(vv);
                vv.finished();
            }
        }
        cursor.next();
    }
    cursor.close();
    if (db.getSchema().getTable("stories") != null) {
        cursor = db.getTable("stories").order("stories_index");
        Map<Verse, List<FormattedText.Headline>> subheadings = new HashMap<>();
        Map<Verse, Chapter> subheadingChapters = new HashMap<>();
        while (!cursor.eof()) {
            int b = (int) cursor.getInteger("book_number");
            int c = (int) cursor.getInteger("chapter");
            int v = (int) cursor.getInteger("verse");
            String title = cursor.getString("title").trim();
            Book bk = bookIDMap.get(b);
            if (bk == null) {
                System.out.println("WARNING: Subheading for unknown book " + b + " skipped");
            } else if (bk.getChapters().size() < c) {
                System.out.println("WARNING: Subheading for unknown chapter " + b + " " + c + " skipped");
            } else {
                Chapter ch = bk.getChapters().get(c - 1);
                Verse vv = null;
                for (Verse vvv : ch.getVerses()) {
                    if (vvv.getNumber().equals("" + v))
                        vv = vvv;
                }
                if (vv == null) {
                    System.out.println("WARNING: Subheading for unknown verse " + b + " " + c + ":" + v + " skipped");
                } else {
                    List<FormattedText.Headline> hls = subheadings.get(vv);
                    if (hls == null) {
                        hls = new ArrayList<>();
                        subheadings.put(vv, hls);
                        subheadingChapters.put(vv, ch);
                    }
                    Headline hl = new Headline(1);
                    while (title.contains("<x>")) {
                        int pos = title.indexOf("<x>");
                        hl.getAppendVisitor().visitText(title.substring(0, pos));
                        title = title.substring(pos + 3);
                        pos = title.indexOf("</x>");
                        if (pos == -1)
                            System.out.println("WARNING: Unclosed cross reference: " + title);
                        else {
                            String ref = title.substring(0, pos);
                            title = title.substring(pos + 4);
                            hl.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText(ref);
                        }
                    }
                    hl.getAppendVisitor().visitText(title);
                    hl.finished();
                    hls.add(hl);
                }
            }
            cursor.next();
        }
        cursor.close();
        for (Verse vv : subheadings.keySet()) {
            Chapter cc = subheadingChapters.get(vv);
            Verse vnew = new Verse(vv.getNumber());
            for (Headline hl : subheadings.get(vv)) {
                hl.accept(vnew.getAppendVisitor().visitHeadline(hl.getDepth()));
            }
            vv.accept(vnew.getAppendVisitor());
            vnew.finished();
            int pos = cc.getVerses().indexOf(vv);
            cc.getVerses().set(pos, vnew);
        }
    }
    if (footnoteDB != null) {
        footnoteDB.commit();
        footnoteDB.close();
    }
    db.commit();
    db.close();
    return result;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SqlJetDb(org.tmatesoft.sqljet.core.table.SqlJetDb) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) ArrayList(java.util.ArrayList) FormattedText(biblemulticonverter.data.FormattedText) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) ISqlJetCursor(org.tmatesoft.sqljet.core.table.ISqlJetCursor) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 22 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class AbstractParatextFormat method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    List<ParatextBook> books = doImportBooks(inputFile);
    String bibleName = null;
    for (ParatextBook book : books) {
        if (bibleName == null || book.getBibleName().isEmpty()) {
            bibleName = book.getBibleName();
        } else {
            String bookBibleName = book.getBibleName();
            // use common suffix
            if (bookBibleName.length() > bibleName.length()) {
                bookBibleName = bookBibleName.substring(bookBibleName.length() - bibleName.length());
            } else if (bibleName.length() > bookBibleName.length()) {
                bibleName = bibleName.substring(bibleName.length() - bookBibleName.length());
            }
            for (int i = bibleName.length() - 1; i >= 0; i--) {
                if (bibleName.charAt(i) != bookBibleName.charAt(i)) {
                    bibleName = bibleName.substring(i + 1);
                    break;
                }
            }
        }
    }
    Bible bible = new Bible((bibleName == null || bibleName.isEmpty()) ? "Imported Bible" : bibleName);
    for (ParatextBook book : books) {
        String longName = book.getAttributes().get("toc1");
        if (longName == null || longName.isEmpty())
            longName = book.getId().getEnglishName();
        String shortName = book.getAttributes().get("toc2");
        if (shortName == null || shortName.isEmpty())
            shortName = longName;
        String abbr = book.getAttributes().get("toc3"), fallbackAbbr = book.getId().getId().getOsisID().replace("x-", "").replace("-", "");
        if (abbr == null)
            abbr = fallbackAbbr;
        abbr = abbr.replace(" ", "");
        if (!Utils.compilePattern(Utils.BOOK_ABBR_REGEX).matcher(abbr).matches()) {
            System.out.println("WARNING: Unsupported book abbreviation " + abbr + ", using " + fallbackAbbr + " instead");
            abbr = fallbackAbbr;
        }
        final Book bk = new Book(abbr, book.getId().getId(), shortName, longName);
        bible.getBooks().add(bk);
        final boolean forceProlog = book.getId().getId().getZefID() < 0;
        final ParatextImportContext ctx = new ParatextImportContext();
        ctx.nt = book.getId().getId().isNT();
        book.accept(new ParatextBookContentVisitor<RuntimeException>() {

            @Override
            public void visitChapterStart(int newChapter) throws RuntimeException {
                if (ctx.cnum != -1 && !ctx.headlines.isEmpty()) {
                    System.out.println("WARNING: Ignoring unreferenced headlines");
                    ctx.headlines.clear();
                }
                if (ctx.cnum == 0 && newChapter == 1) {
                    // we are in prolog (chapter already exists)
                    ctx.cnum = newChapter;
                } else if (newChapter >= 1 && newChapter > ctx.cnum) {
                    if (ctx.cnum == -1)
                        ctx.cnum = 0;
                    while (ctx.cnum < newChapter - 1) {
                        bk.getChapters().add(new Chapter());
                        ctx.cnum++;
                    }
                    ctx.currentChapter = new Chapter();
                    bk.getChapters().add(ctx.currentChapter);
                    ctx.cnum = newChapter;
                } else {
                    System.out.println("WARNING: Ignoring chapter number " + newChapter + ", current chapter is " + ctx.cnum);
                }
                ctx.currentVisitor = null;
                ctx.currentVerse = null;
                ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
            }

            @Override
            public void visitParagraphStart(ParagraphKind kind) throws RuntimeException {
                if (ctx.currentParagraph != ParatextImportContext.CurrentParagraph.NONE) {
                    if (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.PROLOG || (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.NORMAL && ctx.currentVisitor != null)) {
                        ctx.currentVisitor.visitLineBreak(LineBreakKind.PARAGRAPH);
                    }
                    ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
                }
                if (kind.getCategory() == ParagraphKindCategory.SKIP) {
                // do nothing
                } else if (kind.getCategory() == ParagraphKindCategory.HEADLINE) {
                    Headline hl = null;
                    if (kind.isJoinHeadlines() && !ctx.headlines.isEmpty()) {
                        hl = ctx.headlines.get(ctx.headlines.size() - 1);
                        if (hl.getDepth() == kind.getHeadlineDepth() || kind.getHeadlineDepth() == 0) {
                            hl.getAppendVisitor().visitText(" ");
                        } else {
                            hl = null;
                        }
                    }
                    if (hl == null) {
                        hl = new Headline(kind.getHeadlineDepth());
                        ctx.headlines.add(hl);
                    }
                    ctx.currentParagraph = ParatextImportContext.CurrentParagraph.HEADLINE;
                    ctx.currentVisitor = hl.getAppendVisitor();
                    if (kind.getExtraFormatting() != null) {
                        ctx.currentVisitor = ctx.currentVisitor.visitFormattingInstruction(kind.getExtraFormatting());
                    }
                } else {
                    // BLANK_LINE, TABLE_ROW, TEXT
                    if (kind.isProlog() || forceProlog) {
                        if (ctx.cnum == -1) {
                            ctx.cnum = 0;
                            ctx.currentChapter = new Chapter();
                            bk.getChapters().add(ctx.currentChapter);
                        }
                        if (ctx.currentChapter.getProlog() == null) {
                            ctx.currentChapter.setProlog(new FormattedText());
                        }
                        if (!ctx.currentChapter.getVerses().isEmpty()) {
                            System.out.println("WARNING: Adding to prolog after verses have been added!");
                        }
                        ctx.currentVisitor = ctx.currentChapter.getProlog().getAppendVisitor();
                        ctx.currentParagraph = ParatextImportContext.CurrentParagraph.PROLOG;
                        ctx.flushHeadlines();
                    } else {
                        ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NORMAL;
                    }
                }
            }

            @Override
            public void visitTableCellStart(String tag) throws RuntimeException {
                ctx.ensureParagraph();
                if (!tag.matches("t[hc]r?1") && ctx.currentParagraph != ParatextImportContext.CurrentParagraph.HEADLINE && ctx.currentVisitor != null) {
                    ctx.currentVisitor.visitLineBreak(LineBreakKind.NEWLINE_WITH_INDENT);
                }
            }

            @Override
            public void visitParatextCharacterContent(ParatextCharacterContent content) throws RuntimeException {
                ctx.ensureParagraph();
                content.accept(new ParatextImportVisitor(ctx));
            }
        });
        if (!ctx.headlines.isEmpty()) {
            System.out.println("WARNING: Ignoring unreferenced headlines");
            ctx.headlines.clear();
        }
        for (Chapter ch : bk.getChapters()) {
            if (ch.getProlog() != null)
                ch.getProlog().finished();
            for (Verse v : ch.getVerses()) v.finished();
        }
    }
    return bible;
}
Also used : Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText) Book(biblemulticonverter.data.Book) ParagraphKind(biblemulticonverter.format.paratext.ParatextBook.ParagraphKind) Headline(biblemulticonverter.data.FormattedText.Headline) Verse(biblemulticonverter.data.Verse)

Example 23 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class VirtualVerse method validate.

public void validate(Bible bible, BookID book, String bookAbbr, int cnumber, List<String> danglingReferences, Map<String, Set<String>> dictionaryEntries) {
    int lastHeadlineDepth = 0;
    String location = bookAbbr + " " + cnumber + ":v" + getNumber();
    for (Headline headline : headlines) {
        if (headline.getDepth() <= lastHeadlineDepth)
            throw new IllegalStateException("Invalid headline depth order at " + location + ": " + headline.getDepth() + " after " + lastHeadlineDepth);
        lastHeadlineDepth = headline.getDepth() == 9 ? 8 : headline.getDepth();
        headline.validate(bible, book, location + ":Headline", danglingReferences, dictionaryEntries);
    }
    Set<String> verseNumbers = new HashSet<String>();
    for (Verse verse : verses) {
        if (!verseNumbers.add(verse.getNumber()))
            throw new IllegalStateException("Duplicate verse number");
        verse.validate(bible, book, location + ":" + verse.getNumber(), danglingReferences, dictionaryEntries);
    }
}
Also used : Headline(biblemulticonverter.data.FormattedText.Headline) HashSet(java.util.HashSet)

Example 24 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class Chapter method createVirtualVerses.

public List<VirtualVerse> createVirtualVerses() {
    // split up verses to separate headlines
    final List<VirtualVerse> tempVerses = new ArrayList<VirtualVerse>();
    BitSet numericVerseNumbers = new BitSet(verses.size());
    for (final Verse verse : verses) {
        int num;
        try {
            num = Integer.parseInt(verse.getNumber());
            numericVerseNumbers.set(num);
        } catch (NumberFormatException ex) {
            // ignore nonnumeric verse numbers
            num = Integer.MAX_VALUE;
        }
        final int vnum = num;
        verse.accept(new VisitorAdapter<RuntimeException>(null) {

            VirtualVerse vv = new VirtualVerse(vnum);

            boolean hasContent = false;

            {
                tempVerses.add(vv);
                vv.getVerses().add(new Verse(verse.getNumber()));
            }

            @Override
            public Visitor<RuntimeException> visitHeadline(int depth) {
                Headline h = new Headline(depth);
                if (hasContent) {
                    vv = new VirtualVerse(vnum);
                    tempVerses.add(vv);
                    vv.getVerses().add(new Verse(verse.getNumber()));
                    hasContent = false;
                }
                vv.getHeadlines().add(h);
                return h.getAppendVisitor();
            }

            @Override
            public int visitElementTypes(String elementTypes) throws RuntimeException {
                return 0;
            }

            @Override
            public void visitStart() {
                hasContent = true;
            }

            @Override
            public boolean visitEnd() throws RuntimeException {
                hasContent = true;
                return false;
            }

            @Override
            protected void beforeVisit() {
                hasContent = true;
            }

            @Override
            protected Visitor<RuntimeException> getVisitor() {
                return vv.getVerses().get(0).getAppendVisitor();
            }
        });
    }
    // group verses sensibly
    List<VirtualVerse> result = new ArrayList<VirtualVerse>();
    VirtualVerse current = null;
    int nextverse = 1;
    for (VirtualVerse vv : tempVerses) {
        for (Headline h : vv.getHeadlines()) h.finished();
        for (Verse v : vv.getVerses()) v.finished();
        boolean makeNew;
        if (current == null || vv.getHeadlines().size() > 0) {
            makeNew = true;
            if (vv.getNumber() != Integer.MAX_VALUE && vv.getNumber() > nextverse) {
                nextverse = vv.getNumber();
            }
        } else if (vv.getNumber() == Integer.MAX_VALUE) {
            makeNew = false;
        } else {
            // numeric verse without headlines; may be both as new verse and
            // as appended one;
            // decide based on verse number
            int vnum = vv.getNumber();
            if (vnum < nextverse) {
                makeNew = false;
            } else if (vnum > nextverse + 1 && numericVerseNumbers.nextSetBit(nextverse) < vnum) {
                makeNew = false;
                numericVerseNumbers.clear(vnum);
            } else {
                makeNew = true;
                nextverse = vnum;
            }
        }
        if (makeNew) {
            current = new VirtualVerse(nextverse);
            current.getHeadlines().addAll(vv.getHeadlines());
            for (Verse vvv : vv.getVerses()) {
                if (vvv.getElementTypes(1).length() > 0)
                    current.getVerses().add(vvv);
            }
            result.add(current);
            nextverse++;
        } else {
            for (Verse vvv : vv.getVerses()) {
                if (vvv.getElementTypes(1).length() > 0)
                    current.getVerses().add(vvv);
            }
        }
    }
    return result;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) Headline(biblemulticonverter.data.FormattedText.Headline)

Aggregations

Headline (biblemulticonverter.data.FormattedText.Headline)24 Verse (biblemulticonverter.data.Verse)20 Chapter (biblemulticonverter.data.Chapter)18 VirtualVerse (biblemulticonverter.data.VirtualVerse)18 Book (biblemulticonverter.data.Book)17 MetadataBook (biblemulticonverter.data.MetadataBook)11 FormattedText (biblemulticonverter.data.FormattedText)10 ArrayList (java.util.ArrayList)10 BookID (biblemulticonverter.data.BookID)8 Bible (biblemulticonverter.data.Bible)6 BufferedWriter (java.io.BufferedWriter)6 File (java.io.File)6 FileOutputStream (java.io.FileOutputStream)6 IOException (java.io.IOException)6 BIBLEBOOK (biblemulticonverter.schema.zef2005.BIBLEBOOK)5 CAPTION (biblemulticonverter.schema.zef2005.CAPTION)5 CHAPTER (biblemulticonverter.schema.zef2005.CHAPTER)5 VERS (biblemulticonverter.schema.zef2005.VERS)5 OutputStreamWriter (java.io.OutputStreamWriter)5 EnumMap (java.util.EnumMap)5