use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.
the class SWORD method doImport.
protected Bible doImport(Book book) throws Exception {
OSISHelper helper = new OSISHelper();
Bible result = new Bible(book.getName());
TransformerHandler th = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
Map<BookID, biblemulticonverter.data.Book> parsedBooks = new EnumMap<>(BookID.class);
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
List<Verse> allVerses = new ArrayList<>();
Verse nextCandidate = null;
for (Iterator<?> iter = book.getGlobalKeyList().iterator(); iter.hasNext(); ) {
Verse v = (Verse) iter.next();
while (nextCandidate != null && !nextCandidate.equals(v)) {
if (book.contains(nextCandidate)) {
System.out.println("WARNING: Verse (after) skipped by iterator: " + nextCandidate);
allVerses.add(nextCandidate);
}
nextCandidate = nextCandidate.getVersification().add(nextCandidate, 1);
}
Verse prevCandidate = v.getVersification().subtract(v, 1);
List<Verse> versesSkippedBefore = new ArrayList<>();
while (prevCandidate != null && !allVerses.contains(prevCandidate) && !versesSkippedBefore.contains(prevCandidate)) {
versesSkippedBefore.add(0, prevCandidate);
prevCandidate = prevCandidate.getVersification().subtract(prevCandidate, 1);
}
for (Verse vv : versesSkippedBefore) {
if (book.contains(vv)) {
System.out.println("WARNING: Verse (before) skipped by iterator: " + vv);
allVerses.add(vv);
}
}
allVerses.add(v);
nextCandidate = v.getVersification().add(v, 1);
}
while (nextCandidate != null) {
if (book.contains(nextCandidate)) {
System.out.println("WARNING: Verse (at end) skipped by iterator: " + nextCandidate);
allVerses.add(nextCandidate);
}
Verse nextNextCandidate = nextCandidate.getVersification().add(nextCandidate, 1);
if (nextCandidate.equals(nextNextCandidate))
break;
nextCandidate = nextNextCandidate;
}
for (Verse v : allVerses) {
BookID bkid = biblemulticonverter.sword.BookMapping.MAPPING.get(v.getBook());
biblemulticonverter.data.Book bk = parsedBooks.get(bkid);
if (!parsedBooks.containsKey(bkid)) {
bk = new biblemulticonverter.data.Book(bkid.getOsisID().replace("x-Intr", "Intr"), bkid, bkid.getEnglishName(), bkid.getEnglishName());
parsedBooks.put(bkid, bk);
bk.getChapters().add(new Chapter());
result.getBooks().add(bk);
}
int chapterNum = v.getChapter(), verseNum = v.getVerse();
while (bk.getChapters().size() < chapterNum) bk.getChapters().add(new Chapter());
Chapter chapter = bk.getChapters().get(chapterNum == 0 ? 0 : chapterNum - 1);
FormattedText verse;
if (verseNum == 0) {
verse = new FormattedText();
if (chapter.getProlog() != null) {
chapter.getProlog().accept(verse.getAppendVisitor());
}
chapter.setProlog(verse);
} else {
if (chapterNum == 0)
throw new IllegalStateException("Verse " + verseNum + " in chapter 0 is invalid");
verse = new biblemulticonverter.data.Verse("" + verseNum);
chapter.getVerses().add((biblemulticonverter.data.Verse) verse);
}
Element root = doc.createElement("verse");
th.setResult(new DOMResult(root));
new BookData(book, v).getSAXEventProvider().provideSAXEvents(th);
if (root.getChildNodes().getLength() == 1 && root.getFirstChild() instanceof Element && root.getFirstChild().getNodeName().equals("div") && root.getFirstChild().getChildNodes().getLength() >= 1 && root.getFirstChild().getFirstChild().getNodeName().equals("title")) {
Element div = (Element) root.getFirstChild();
root.removeChild(div);
div.removeChild(div.getFirstChild());
while (div.getFirstChild() != null) {
Node child = div.getFirstChild();
div.removeChild(child);
root.appendChild(child);
}
} else {
throw new RuntimeException("Unexpected OSIS structure!");
}
helper.handleVerse(root, verse);
if (verse.getElementTypes(1).length() == 0) {
System.out.println("WARNING: Empty verse " + bk.getAbbr() + " " + chapterNum + ":" + verseNum);
if (verse instanceof biblemulticonverter.data.Verse)
chapter.getVerses().remove(verse);
else
chapter.setProlog(null);
}
}
for (biblemulticonverter.data.Book bk : parsedBooks.values()) {
while (!bk.getChapters().isEmpty()) {
Chapter ch = bk.getChapters().get(bk.getChapters().size() - 1);
if (ch.getProlog() == null && ch.getVerses().isEmpty()) {
bk.getChapters().remove(ch);
} else {
break;
}
}
if (bk.getChapters().isEmpty()) {
result.getBooks().remove(bk);
}
}
return result;
}
use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.
the class OSIS method doImport.
@Override
public Bible doImport(File inputFile) throws Exception {
ValidateXML.validateFileBeforeParsing(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(ObjectFactory.class.getResource("/osisCore.2.1.1.xsd")), inputFile);
printedWarnings.clear();
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
XPath xpath = javax.xml.xpath.XPathFactory.newInstance().newXPath();
Document osisDoc = docBuilder.parse(inputFile);
String name = xpath.evaluate("/osis/osisText/header/work/title/text()", osisDoc);
if (name.isEmpty())
name = "OSIS Bible";
Bible result = new Bible(name);
String description = xpath.evaluate("/osis/osisText/header/work/description/text()", osisDoc);
String rights = xpath.evaluate("/osis/osisText/header/work/rights/text()", osisDoc);
if (!description.isEmpty() || !rights.isEmpty()) {
String date = xpath.evaluate("/osis/osisText/header/work/date/text()", osisDoc);
String titleDesc = xpath.evaluate("/osis/osisText/titlePage/description/text()", osisDoc);
MetadataBook mb = new MetadataBook();
if (!description.isEmpty())
mb.setValue(MetadataBookKey.description, description.replaceAll("[\r\n\t ]+", " ").trim());
if (!rights.isEmpty())
mb.setValue(MetadataBookKey.rights, rights.replaceAll("[\r\n\t ]+", " ").trim());
if (!date.isEmpty())
mb.setValue(MetadataBookKey.date, date);
if (!titleDesc.isEmpty())
mb.setValue("description@titlePage", titleDesc.replaceAll("[\r\n\t ]+", " ").trim());
mb.finished();
result.getBooks().add(mb.getBook());
}
NodeList osisBooks = (NodeList) xpath.evaluate("/osis/osisText//div[@type='book']", osisDoc, XPathConstants.NODESET);
for (int bookIndex = 0; bookIndex < osisBooks.getLength(); bookIndex++) {
Element osisBook = (Element) osisBooks.item(bookIndex);
if (!osisBook.getAttribute("sID").isEmpty()) {
Element osisBookEnd = (Element) xpath.evaluate("//div[@eID='" + osisBook.getAttribute("sID") + "']", osisDoc, XPathConstants.NODE);
if (osisBookEnd == null) {
throw new IllegalStateException("No milestoned div found with eID " + osisBook.getAttribute("sID"));
}
if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
List<Node> candidates = new ArrayList<>();
Node commonParent = osisBookEnd;
while (commonParent != null) {
candidates.add(commonParent);
commonParent = commonParent.getParentNode();
}
commonParent = osisBook;
search: while (commonParent != null) {
for (Node candidate : candidates) {
if (commonParent.isSameNode(candidate)) {
break search;
}
}
commonParent = commonParent.getParentNode();
}
if (commonParent == null)
throw new IllegalStateException("Unable to find common parent of milestoned div start and end tag");
convertToMilestoned((Element) commonParent);
if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
throw new IllegalStateException("Unable to normalize XML so that milestoned div start and end tags are siblings");
}
}
while (osisBook.getNextSibling() != null && !osisBook.getNextSibling().isSameNode(osisBookEnd)) {
osisBook.appendChild(osisBook.getNextSibling());
}
osisBookEnd.getParentNode().removeChild(osisBookEnd);
}
String bookOsisID = osisBook.getAttribute("osisID");
BookID bookID = BookID.fromOsisId(bookOsisID);
String title = bookID.getEnglishName();
Node titleElem = osisBook.getFirstChild();
while (titleElem instanceof Text) titleElem = titleElem.getNextSibling();
if (titleElem instanceof Element && titleElem.getNodeName().equals("title")) {
Element titleElement = (Element) titleElem;
if (titleElement.getAttribute("type").equals("main") && titleElement.getChildNodes().getLength() > 0)
title = titleElement.getTextContent();
}
Book bibleBook = new Book(bookOsisID, bookID, title, title);
result.getBooks().add(bibleBook);
parseBook(bookOsisID, osisBook, bibleBook);
}
return result;
}
use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.
the class RoundtripHTML method doImport.
@Override
public Bible doImport(File inputDir) throws Exception {
Bible bible;
// metadata
try (BufferedReader br = createReader(inputDir, "metadata.js")) {
String line = br.readLine();
br.readLine();
bible = new Bible(line.substring(13, line.length() - 2).replace("\\\"", "\"").replace("\\\\", "\\"));
Map<String, Object> fieldMap = new HashMap<String, Object>();
while ((line = br.readLine()) != null) {
if (line.startsWith("}")) {
Book bk = new Book((String) fieldMap.get("abbr"), BookID.fromOsisId((String) fieldMap.get("osis")), (String) fieldMap.get("short"), (String) fieldMap.get("long"));
for (int i = 0; i < (Integer) fieldMap.get("chapters"); i++) {
bk.getChapters().add(new Chapter());
}
bible.getBooks().add(bk);
continue;
}
int pos = line.indexOf(":");
String key = line.substring(0, pos);
String value = line.substring(pos + 1);
if (value.endsWith(","))
value = value.substring(0, value.length() - 1);
if (value.startsWith("\"") && value.endsWith("\"")) {
fieldMap.put(key, value.substring(1, value.length() - 1).replace("\\\"", "\"").replace("\\\\", "\\"));
} else if (value.equals("true") || value.equals("false")) {
fieldMap.put(key, Boolean.parseBoolean(value));
} else {
fieldMap.put(key, Integer.parseInt(value));
}
}
}
// chapters
for (Book bk : bible.getBooks()) {
int cnumber = 0;
for (Chapter ch : bk.getChapters()) {
cnumber++;
try (BufferedReader br = createReader(inputDir, getTypeDir(bk.getId()) + "/" + bk.getAbbr() + "_" + cnumber + ".html")) {
String line;
List<FormattedText.Visitor<RuntimeException>> footnotes = new ArrayList<>();
while ((line = br.readLine()) != null) {
if (line.equals("<div class=\"biblehtmlcontent prolog\">")) {
line = br.readLine();
FormattedText prolog = new FormattedText();
int end = parseLine(prolog.getAppendVisitor(), line, 0, footnotes);
ch.setProlog(prolog);
if (end != line.length())
throw new IOException(line.substring(end));
line = br.readLine();
if (!line.equals("</div>"))
throw new IOException(line);
} else if (line.equals("<div class=\"biblehtmlcontent verses\" id=\"verses\">")) {
while ((line = br.readLine()) != null) {
if (line.equals("</div>"))
break;
if (!line.startsWith("<div class=\"v\" id=\"v") || !line.endsWith("</div>"))
throw new IOException(line);
line = line.substring(20, line.length() - 6);
int pos = line.indexOf("\">");
Verse v = new Verse(line.substring(0, pos));
int end = parseLine(v.getAppendVisitor(), line, pos + 2, footnotes);
if (end != line.length())
throw new IOException(line.substring(end));
ch.getVerses().add(v);
}
if (!line.equals("</div>"))
throw new IOException(line);
} else if (line.equals("<div class=\"biblehtmlcontent footnotes\">")) {
for (int i = 0; i < footnotes.size(); i++) {
line = br.readLine();
String prefix = "<div class=\"fn\"><sup class=\"fnt\"><a name=\"fn" + (i + 1) + "\" href=\"#fnm" + (i + 1) + "\">" + (i + 1) + "</a></sup> ";
if (!line.startsWith(prefix) || !line.endsWith("</div>"))
throw new IOException(line);
line = line.substring(prefix.length(), line.length() - 6);
int end = parseLine(footnotes.get(i), line, 0, null);
if (end != line.length())
throw new IOException(line.substring(end));
}
line = br.readLine();
if (!line.equals("</div>"))
throw new IOException(line);
}
}
if (ch.getProlog() != null)
ch.getProlog().finished();
for (Verse v : ch.getVerses()) v.finished();
}
}
}
return bible;
}
use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.
the class RoundtripXML method parseBible.
protected Bible parseBible(JAXBElement<BibleType> sBible) throws Exception {
Bible dBible = new Bible(sBible.getValue().getName());
for (BibleType.Book sBook : sBible.getValue().getBook()) {
Book dBook = new Book(sBook.getAbbr(), BookID.fromOsisId(sBook.getId().replaceAll("-[0-9]+$", "")), sBook.getShortName(), sBook.getLongName());
dBible.getBooks().add(dBook);
for (BibleType.Book.Chapter sChapter : sBook.getChapter()) {
Chapter dChapter = new Chapter();
dBook.getChapters().add(dChapter);
if (sChapter.getProlog() != null) {
dChapter.setProlog(new FormattedText());
parseContent(dChapter.getProlog().getAppendVisitor(), sChapter.getProlog().getContent());
dChapter.getProlog().finished();
}
for (BibleType.Book.Chapter.Verse sVerse : sChapter.getVerse()) {
Verse dVerse = new Verse(sVerse.getNumber());
dChapter.getVerses().add(dVerse);
parseContent(dVerse.getAppendVisitor(), sVerse.getContent());
dVerse.finished();
}
}
}
return dBible;
}
Aggregations