use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class MyBibleZone method doImport.
@Override
public Bible doImport(File inputFile) throws Exception {
SqlJetDb db = SqlJetDb.open(inputFile, false);
SqlJetDb footnoteDB = null;
File footnoteFile = new File(inputFile.getParentFile(), inputFile.getName().replace(".SQLite3", ".commentaries.SQLite3"));
if (inputFile.getName().endsWith(".SQLite3") && footnoteFile.exists()) {
footnoteDB = SqlJetDb.open(footnoteFile, false);
if (!footnoteDB.getTable("commentaries").getIndexesNames().contains("commentaries_index")) {
footnoteDB.close();
footnoteDB = SqlJetDb.open(footnoteFile, true);
checkIndex(footnoteDB, "commentaries", "commentaries_index", "CREATE INDEX commentaries_index on commentaries(book_number, chapter_number_from, verse_number_from)");
}
footnoteDB.beginTransaction(SqlJetTransactionMode.READ_ONLY);
}
if (!db.getTable("verses").getIndexesNames().contains("versesIndex") || (db.getSchema().getTable("stories") != null && !db.getTable("stories").getIndexesNames().contains("stories_index"))) {
db.close();
db = SqlJetDb.open(inputFile, true);
checkIndex(db, "verses", "verses_index", "CREATE UNIQUE INDEX verses_index on verses (book_number, chapter, verse)");
if (db.getSchema().getTable("stories") != null)
if (db.getSchema().getTable("stories").getColumn("order_if_several") == null)
checkIndex(db, "stories", "stories_index", "CREATE UNIQUE INDEX stories_index on stories(book_number, chapter, verse)");
else
checkIndex(db, "stories", "stories_index", "CREATE UNIQUE INDEX stories_index on stories(book_number, chapter, verse, order_if_several)");
}
db.beginTransaction(SqlJetTransactionMode.READ_ONLY);
String bibleName = null;
MetadataBook mb = new MetadataBook();
ISqlJetCursor cursor = db.getTable("info").open();
while (!cursor.eof()) {
String fn = cursor.getString("name");
String fv = cursor.getString("value");
if (fn.equals("description")) {
bibleName = fv;
} else if (!fv.isEmpty()) {
fv = fv.replaceAll("[\r\n]+", "\n").replaceAll(" *\n *", "\n").replaceAll("\n$", "");
try {
mb.setValue("MyBible.zone@" + fn.replace('_', '.'), fv);
} catch (IllegalArgumentException ex) {
System.out.println("WARNING: Skipping malformed metadata property " + fn);
}
}
cursor.next();
}
cursor.close();
if (bibleName == null) {
System.out.println("WARNING: No bible name in info table");
bibleName = inputFile.getName();
}
Bible result = new Bible(bibleName.trim());
if (!mb.getKeys().isEmpty()) {
mb.finished();
result.getBooks().add(mb.getBook());
}
Map<Integer, Book> bookIDMap = new HashMap<>();
cursor = db.getTable("books").open();
while (!cursor.eof()) {
int num = (int) cursor.getInteger("book_number");
String col = cursor.getString("book_color");
String shortName = cursor.getString("short_name").trim().replace(" ", "").replaceAll("[^A-Z0-9a-zäöü]++", "");
if (!shortName.isEmpty())
shortName = shortName.substring(0, 1).toUpperCase() + shortName.substring(1);
String longName = cursor.getString("long_name").trim();
BookID bid = null;
for (MyBibleZoneBook bi : BOOK_INFO) {
if (bi.bookNumber == num) {
bid = bi.bookID;
if (!col.equals(bi.bookColor))
System.out.println("WARNING: Book " + bid.getOsisID() + " uses color " + col + " and not " + bi.bookColor);
}
}
if (bid == null) {
System.out.println("WARNING: Book number " + num + " unknown; skipping: " + shortName + "/" + longName);
// generate dummy entry not stored in result object
bookIDMap.put(num, new Book("Xxx", BookID.BOOK_Gen, "X", "X"));
} else {
if (shortName.length() < 2)
shortName = bid.getOsisID().replaceAll("[^A-Z0-9a-zäöü]++", "");
Book bk = new Book(shortName, bid, longName, longName);
result.getBooks().add(bk);
bookIDMap.put(num, bk);
}
cursor.next();
}
cursor.close();
if (db.getSchema().getTable("introductions") != null) {
cursor = db.getTable("introductions").open();
while (!cursor.eof()) {
int num = (int) cursor.getInteger("book_number");
String intro = cursor.getString("introduction");
Book bk;
if (num == 0) {
bk = new Book("Intro", BookID.INTRODUCTION, "_Introduction_", "_Introduction_");
if (!result.getBooks().isEmpty() && result.getBooks().get(0).getId().equals(BookID.METADATA)) {
result.getBooks().add(1, bk);
} else {
result.getBooks().add(0, bk);
}
} else {
bk = bookIDMap.get(num);
}
if (bk == null) {
System.out.println("WARNING: Skipping introduction for nonexisting book " + num);
} else {
FormattedText ft = new FormattedText();
convertFromHTML(intro, ft.getAppendVisitor());
ft.finished();
if (bk.getChapters().isEmpty())
bk.getChapters().add(new Chapter());
bk.getChapters().get(0).setProlog(ft);
}
cursor.next();
}
cursor.close();
}
cursor = db.getTable("verses").order("verses_index");
while (!cursor.eof()) {
int b = (int) cursor.getInteger("book_number");
int c = (int) cursor.getInteger("chapter");
int v = (int) cursor.getInteger("verse");
String text = cursor.getString("text");
if (text == null)
text = "";
text = text.trim();
if (!text.isEmpty()) {
Book bk = bookIDMap.get(b);
if (bk == null) {
System.out.println("WARNING: Verse for unknown book " + b + " skipped");
} else {
while (bk.getChapters().size() < c) bk.getChapters().add(new Chapter());
Chapter ch = bk.getChapters().get(c - 1);
Verse vv = new Verse("" + v);
try {
String rest = convertFromVerse(text, vv.getAppendVisitor(), footnoteDB, new int[] { b, c, v });
if (!rest.isEmpty()) {
System.out.println("WARNING: Treating tags as plaintext: " + rest);
vv.getAppendVisitor().visitText(rest.replace('\t', ' ').replaceAll(" +", " "));
}
} catch (RuntimeException ex) {
throw new RuntimeException(text, ex);
}
ch.getVerses().add(vv);
vv.finished();
}
}
cursor.next();
}
cursor.close();
if (db.getSchema().getTable("stories") != null) {
cursor = db.getTable("stories").order("stories_index");
Map<Verse, List<FormattedText.Headline>> subheadings = new HashMap<>();
Map<Verse, Chapter> subheadingChapters = new HashMap<>();
while (!cursor.eof()) {
int b = (int) cursor.getInteger("book_number");
int c = (int) cursor.getInteger("chapter");
int v = (int) cursor.getInteger("verse");
String title = cursor.getString("title").trim();
Book bk = bookIDMap.get(b);
if (bk == null) {
System.out.println("WARNING: Subheading for unknown book " + b + " skipped");
} else if (bk.getChapters().size() < c) {
System.out.println("WARNING: Subheading for unknown chapter " + b + " " + c + " skipped");
} else {
Chapter ch = bk.getChapters().get(c - 1);
Verse vv = null;
for (Verse vvv : ch.getVerses()) {
if (vvv.getNumber().equals("" + v))
vv = vvv;
}
if (vv == null) {
System.out.println("WARNING: Subheading for unknown verse " + b + " " + c + ":" + v + " skipped");
} else {
List<FormattedText.Headline> hls = subheadings.get(vv);
if (hls == null) {
hls = new ArrayList<>();
subheadings.put(vv, hls);
subheadingChapters.put(vv, ch);
}
Headline hl = new Headline(1);
while (title.contains("<x>")) {
int pos = title.indexOf("<x>");
hl.getAppendVisitor().visitText(title.substring(0, pos));
title = title.substring(pos + 3);
pos = title.indexOf("</x>");
if (pos == -1)
System.out.println("WARNING: Unclosed cross reference: " + title);
else {
String ref = title.substring(0, pos);
title = title.substring(pos + 4);
hl.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText(ref);
}
}
hl.getAppendVisitor().visitText(title);
hl.finished();
hls.add(hl);
}
}
cursor.next();
}
cursor.close();
for (Verse vv : subheadings.keySet()) {
Chapter cc = subheadingChapters.get(vv);
Verse vnew = new Verse(vv.getNumber());
for (Headline hl : subheadings.get(vv)) {
hl.accept(vnew.getAppendVisitor().visitHeadline(hl.getDepth()));
}
vv.accept(vnew.getAppendVisitor());
vnew.finished();
int pos = cc.getVerses().indexOf(vv);
cc.getVerses().set(pos, vnew);
}
}
if (footnoteDB != null) {
footnoteDB.commit();
footnoteDB.close();
}
db.commit();
db.close();
return result;
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class AbstractParatextFormat method doImport.
@Override
public Bible doImport(File inputFile) throws Exception {
List<ParatextBook> books = doImportBooks(inputFile);
String bibleName = null;
for (ParatextBook book : books) {
if (bibleName == null || book.getBibleName().isEmpty()) {
bibleName = book.getBibleName();
} else {
String bookBibleName = book.getBibleName();
// use common suffix
if (bookBibleName.length() > bibleName.length()) {
bookBibleName = bookBibleName.substring(bookBibleName.length() - bibleName.length());
} else if (bibleName.length() > bookBibleName.length()) {
bibleName = bibleName.substring(bibleName.length() - bookBibleName.length());
}
for (int i = bibleName.length() - 1; i >= 0; i--) {
if (bibleName.charAt(i) != bookBibleName.charAt(i)) {
bibleName = bibleName.substring(i + 1);
break;
}
}
}
}
Bible bible = new Bible((bibleName == null || bibleName.isEmpty()) ? "Imported Bible" : bibleName);
for (ParatextBook book : books) {
String longName = book.getAttributes().get("toc1");
if (longName == null || longName.isEmpty())
longName = book.getId().getEnglishName();
String shortName = book.getAttributes().get("toc2");
if (shortName == null || shortName.isEmpty())
shortName = longName;
String abbr = book.getAttributes().get("toc3"), fallbackAbbr = book.getId().getId().getOsisID().replace("x-", "").replace("-", "");
if (abbr == null)
abbr = fallbackAbbr;
abbr = abbr.replace(" ", "");
if (!Utils.compilePattern(Utils.BOOK_ABBR_REGEX).matcher(abbr).matches()) {
System.out.println("WARNING: Unsupported book abbreviation " + abbr + ", using " + fallbackAbbr + " instead");
abbr = fallbackAbbr;
}
final Book bk = new Book(abbr, book.getId().getId(), shortName, longName);
bible.getBooks().add(bk);
final boolean forceProlog = book.getId().getId().getZefID() < 0;
final ParatextImportContext ctx = new ParatextImportContext();
ctx.nt = book.getId().getId().isNT();
book.accept(new ParatextBookContentVisitor<RuntimeException>() {
@Override
public void visitChapterStart(int newChapter) throws RuntimeException {
if (ctx.cnum != -1 && !ctx.headlines.isEmpty()) {
System.out.println("WARNING: Ignoring unreferenced headlines");
ctx.headlines.clear();
}
if (ctx.cnum == 0 && newChapter == 1) {
// we are in prolog (chapter already exists)
ctx.cnum = newChapter;
} else if (newChapter >= 1 && newChapter > ctx.cnum) {
if (ctx.cnum == -1)
ctx.cnum = 0;
while (ctx.cnum < newChapter - 1) {
bk.getChapters().add(new Chapter());
ctx.cnum++;
}
ctx.currentChapter = new Chapter();
bk.getChapters().add(ctx.currentChapter);
ctx.cnum = newChapter;
} else {
System.out.println("WARNING: Ignoring chapter number " + newChapter + ", current chapter is " + ctx.cnum);
}
ctx.currentVisitor = null;
ctx.currentVerse = null;
ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
}
@Override
public void visitParagraphStart(ParagraphKind kind) throws RuntimeException {
if (ctx.currentParagraph != ParatextImportContext.CurrentParagraph.NONE) {
if (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.PROLOG || (ctx.currentParagraph == ParatextImportContext.CurrentParagraph.NORMAL && ctx.currentVisitor != null)) {
ctx.currentVisitor.visitLineBreak(LineBreakKind.PARAGRAPH);
}
ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NONE;
}
if (kind.getCategory() == ParagraphKindCategory.SKIP) {
// do nothing
} else if (kind.getCategory() == ParagraphKindCategory.HEADLINE) {
Headline hl = null;
if (kind.isJoinHeadlines() && !ctx.headlines.isEmpty()) {
hl = ctx.headlines.get(ctx.headlines.size() - 1);
if (hl.getDepth() == kind.getHeadlineDepth() || kind.getHeadlineDepth() == 0) {
hl.getAppendVisitor().visitText(" ");
} else {
hl = null;
}
}
if (hl == null) {
hl = new Headline(kind.getHeadlineDepth());
ctx.headlines.add(hl);
}
ctx.currentParagraph = ParatextImportContext.CurrentParagraph.HEADLINE;
ctx.currentVisitor = hl.getAppendVisitor();
if (kind.getExtraFormatting() != null) {
ctx.currentVisitor = ctx.currentVisitor.visitFormattingInstruction(kind.getExtraFormatting());
}
} else {
// BLANK_LINE, TABLE_ROW, TEXT
if (kind.isProlog() || forceProlog) {
if (ctx.cnum == -1) {
ctx.cnum = 0;
ctx.currentChapter = new Chapter();
bk.getChapters().add(ctx.currentChapter);
}
if (ctx.currentChapter.getProlog() == null) {
ctx.currentChapter.setProlog(new FormattedText());
}
if (!ctx.currentChapter.getVerses().isEmpty()) {
System.out.println("WARNING: Adding to prolog after verses have been added!");
}
ctx.currentVisitor = ctx.currentChapter.getProlog().getAppendVisitor();
ctx.currentParagraph = ParatextImportContext.CurrentParagraph.PROLOG;
ctx.flushHeadlines();
} else {
ctx.currentParagraph = ParatextImportContext.CurrentParagraph.NORMAL;
}
}
}
@Override
public void visitTableCellStart(String tag) throws RuntimeException {
ctx.ensureParagraph();
if (!tag.matches("t[hc]r?1") && ctx.currentParagraph != ParatextImportContext.CurrentParagraph.HEADLINE && ctx.currentVisitor != null) {
ctx.currentVisitor.visitLineBreak(LineBreakKind.NEWLINE_WITH_INDENT);
}
}
@Override
public void visitParatextCharacterContent(ParatextCharacterContent content) throws RuntimeException {
ctx.ensureParagraph();
content.accept(new ParatextImportVisitor(ctx));
}
});
if (!ctx.headlines.isEmpty()) {
System.out.println("WARNING: Ignoring unreferenced headlines");
ctx.headlines.clear();
}
for (Chapter ch : bk.getChapters()) {
if (ch.getProlog() != null)
ch.getProlog().finished();
for (Verse v : ch.getVerses()) v.finished();
}
}
return bible;
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class VirtualVerse method validate.
public void validate(Bible bible, BookID book, String bookAbbr, int cnumber, List<String> danglingReferences, Map<String, Set<String>> dictionaryEntries) {
int lastHeadlineDepth = 0;
String location = bookAbbr + " " + cnumber + ":v" + getNumber();
for (Headline headline : headlines) {
if (headline.getDepth() <= lastHeadlineDepth)
throw new IllegalStateException("Invalid headline depth order at " + location + ": " + headline.getDepth() + " after " + lastHeadlineDepth);
lastHeadlineDepth = headline.getDepth() == 9 ? 8 : headline.getDepth();
headline.validate(bible, book, location + ":Headline", danglingReferences, dictionaryEntries);
}
Set<String> verseNumbers = new HashSet<String>();
for (Verse verse : verses) {
if (!verseNumbers.add(verse.getNumber()))
throw new IllegalStateException("Duplicate verse number");
verse.validate(bible, book, location + ":" + verse.getNumber(), danglingReferences, dictionaryEntries);
}
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class Chapter method createVirtualVerses.
public List<VirtualVerse> createVirtualVerses() {
// split up verses to separate headlines
final List<VirtualVerse> tempVerses = new ArrayList<VirtualVerse>();
BitSet numericVerseNumbers = new BitSet(verses.size());
for (final Verse verse : verses) {
int num;
try {
num = Integer.parseInt(verse.getNumber());
numericVerseNumbers.set(num);
} catch (NumberFormatException ex) {
// ignore nonnumeric verse numbers
num = Integer.MAX_VALUE;
}
final int vnum = num;
verse.accept(new VisitorAdapter<RuntimeException>(null) {
VirtualVerse vv = new VirtualVerse(vnum);
boolean hasContent = false;
{
tempVerses.add(vv);
vv.getVerses().add(new Verse(verse.getNumber()));
}
@Override
public Visitor<RuntimeException> visitHeadline(int depth) {
Headline h = new Headline(depth);
if (hasContent) {
vv = new VirtualVerse(vnum);
tempVerses.add(vv);
vv.getVerses().add(new Verse(verse.getNumber()));
hasContent = false;
}
vv.getHeadlines().add(h);
return h.getAppendVisitor();
}
@Override
public int visitElementTypes(String elementTypes) throws RuntimeException {
return 0;
}
@Override
public void visitStart() {
hasContent = true;
}
@Override
public boolean visitEnd() throws RuntimeException {
hasContent = true;
return false;
}
@Override
protected void beforeVisit() {
hasContent = true;
}
@Override
protected Visitor<RuntimeException> getVisitor() {
return vv.getVerses().get(0).getAppendVisitor();
}
});
}
// group verses sensibly
List<VirtualVerse> result = new ArrayList<VirtualVerse>();
VirtualVerse current = null;
int nextverse = 1;
for (VirtualVerse vv : tempVerses) {
for (Headline h : vv.getHeadlines()) h.finished();
for (Verse v : vv.getVerses()) v.finished();
boolean makeNew;
if (current == null || vv.getHeadlines().size() > 0) {
makeNew = true;
if (vv.getNumber() != Integer.MAX_VALUE && vv.getNumber() > nextverse) {
nextverse = vv.getNumber();
}
} else if (vv.getNumber() == Integer.MAX_VALUE) {
makeNew = false;
} else {
// numeric verse without headlines; may be both as new verse and
// as appended one;
// decide based on verse number
int vnum = vv.getNumber();
if (vnum < nextverse) {
makeNew = false;
} else if (vnum > nextverse + 1 && numericVerseNumbers.nextSetBit(nextverse) < vnum) {
makeNew = false;
numericVerseNumbers.clear(vnum);
} else {
makeNew = true;
nextverse = vnum;
}
}
if (makeNew) {
current = new VirtualVerse(nextverse);
current.getHeadlines().addAll(vv.getHeadlines());
for (Verse vvv : vv.getVerses()) {
if (vvv.getElementTypes(1).length() > 0)
current.getVerses().add(vvv);
}
result.add(current);
nextverse++;
} else {
for (Verse vvv : vv.getVerses()) {
if (vvv.getElementTypes(1).length() > 0)
current.getVerses().add(vvv);
}
}
}
return result;
}
Aggregations