use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.
the class EquipdEPUB method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
final Set<String> unsupportedFeatures = new HashSet<>();
String uuid = UUID.randomUUID().toString() + "-" + System.currentTimeMillis() / 1000;
Boolean headlinesAfter = false;
if (exportArgs.length > 1) {
if (exportArgs[1].equals("-headlinesAfter"))
headlinesAfter = true;
else if (exportArgs[1].equals("-noHeadlines"))
headlinesAfter = null;
else
System.out.println("WARNING: Unsupported argument: " + exportArgs[1]);
}
try (ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(exportArgs[0] + ".epub"))) {
zos.putNextEntry(new ZipEntry("mimetype"));
zos.write("application/epub+zip".getBytes(StandardCharsets.US_ASCII));
zos.putNextEntry(new ZipEntry("META-INF/container.xml"));
zos.write(("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<container" + " version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n <rootfiles>\n" + " <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n" + " </rootfiles>\n</container>").getBytes(StandardCharsets.US_ASCII));
zos.putNextEntry(new ZipEntry("OEBPS/content.opf"));
StringBuilder sb = new StringBuilder();
sb.append("<?xml version=\"1.0\"?>\n<package version=\"2.0\" xmlns=\"http://www.idpf.org/2007/opf\"" + " unique-identifier=\"uuid\">\n <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\"" + " xmlns:opf=\"http://www.idpf.org/2007/opf\">\n <dc:title>" + xml(bible.getName()) + "</dc:title>\n <dc:creator opf:role=\"aut\"></dc:creator>\n <dc:language>en</dc:language>\n" + " <dc:rights>Public Domain</dc:rights>\n <dc:publisher></dc:publisher>\n" + " <dc:identifier id=\"uuid\">" + uuid + "</dc:identifier>\n </metadata>\n <manifest>\n" + " <item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\" />\n" + " <item id=\"style\" href=\"global.css\" media-type=\"text/css\" />\n");
int counter = 1;
for (Book book : bible.getBooks()) {
if (book.getId().getZefID() < 1 || book.getId().getZefID() > 66)
continue;
String fileName = String.format("%02d.%s", book.getId().getZefID(), BOOK_NAMES[book.getId().getZefID()]);
for (int i = 0; i <= book.getChapters().size(); i++) {
sb.append(" <item id=\"chapter" + counter + "\" href=\"" + fileName + "." + i + ".xhtml\" media-type=\"application/xhtml+xml\" />\n");
counter++;
}
}
sb.append(" </manifest>\n <spine toc=\"ncx\">\n");
for (int i = 1; i < counter; i++) {
sb.append(" <itemref idref=\"chapter" + i + "\" />\n");
}
sb.append(" </spine>\n</package>");
zos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
zos.putNextEntry(new ZipEntry("OEBPS/global.css"));
zos.write(("span.vn {\n font-weight: bold;\n}\n" + "div.ci a {\n display: block;\n float: left;\n margin: 0 15px 15px 0;\n}\n" + "").getBytes(StandardCharsets.US_ASCII));
zos.putNextEntry(new ZipEntry("OEBPS/toc.ncx"));
sb.setLength(0);
sb.append("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<ncx" + " xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\">\n <head>\n" + " <meta content=\"urn:uuid:" + uuid + "\" name=\"dtb:uuid\" />\n <meta content=\"2\"" + " name=\"dtb:depth\" />\n <meta content=\"0\" name=\"dtb:totalPageCount\" />\n" + " <meta content=\"0\" name=\"dtb:maxPageNumber\" />\n </head>\n <docTitle>\n <text>" + xml(bible.getName()) + "</text>\n </docTitle>\n <navMap>\n");
counter = 1;
for (Book book : bible.getBooks()) {
if (book.getId().getZefID() < 1 || book.getId().getZefID() > 66)
continue;
String fileName = String.format("%02d.%s", book.getId().getZefID(), BOOK_NAMES[book.getId().getZefID()]);
sb.append(" <navPoint id=\"navPoint" + counter + "\" playOrder=\"" + counter + "\">\n <navLabel><text>" + xml(book.getShortName()) + "</text></navLabel>\n <content src=\"" + fileName + ".0.xhtml\" />\n" + " </navPoint>\n");
counter++;
}
sb.append(" </navMap>\n</ncx>");
zos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
for (Book book : bible.getBooks()) {
if (book.getId().getZefID() < 1 || book.getId().getZefID() > 66) {
System.out.println("WARNING: Skipping book " + book.getAbbr());
continue;
}
String fileName = String.format("%02d.%s", book.getId().getZefID(), BOOK_NAMES[book.getId().getZefID()]);
zos.putNextEntry(new ZipEntry("OEBPS/" + fileName + ".0.xhtml"));
sb.setLength(0);
sb.append("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<html dir=\"ltr\" xmlns=\"http://www.w3.org/1999/xhtml\" " + "xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n <meta http-equiv=\"Content-Type\"" + " content=\"text/html; charset=UTF-8\" />\n <title>" + xml(book.getShortName()) + " </title>\n" + " <link rel=\"stylesheet\" href=\"global.css\" type=\"text/css\" />\n</head>\n<body>\n\n<h2>" + xml(book.getShortName()) + " </h2>\n\n<div class=\"ci\">\n");
for (int i = 1; i <= book.getChapters().size(); i++) {
sb.append("<a href=\"" + fileName + "." + i + ".xhtml\">" + i + "</a>");
}
sb.append("\n</div>\n\n</body>\n</html>");
zos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
for (int i = 1; i <= book.getChapters().size(); i++) {
zos.putNextEntry(new ZipEntry("OEBPS/" + fileName + "." + i + ".xhtml"));
StringWriter sw = new StringWriter();
writeChapter(sw, unsupportedFeatures, book, i, headlinesAfter);
zos.write(sw.toString().getBytes(StandardCharsets.UTF_8));
}
}
}
if (!unsupportedFeatures.isEmpty()) {
System.out.println("WARNING: Skipped unsupported features: " + unsupportedFeatures);
}
}
use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.
the class HeatMapHTML method build.
private void build(File outputFile, Bible bible, String title, Categorization[] categorizations, Map<BookID, int[][][]> rawDataPerBook) throws Exception {
List<Section> otBooks = new ArrayList<>(), ntBooks = new ArrayList<>();
for (Book book : bible.getBooks()) {
int[][][] rawData = rawDataPerBook.get(book.getId());
if (rawData == null)
continue;
List<Section> bookList = book.getId().isNT() ? ntBooks : otBooks;
int chapCount = book.getChapters().size();
if (chapCount == 1) {
if (!book.getChapters().get(0).getVerses().isEmpty()) {
String[] verseNumbers = new String[book.getChapters().get(0).getVerses().size()];
for (int i = 0; i < verseNumbers.length; i++) {
verseNumbers[i] = book.getChapters().get(0).getVerses().get(i).getNumber();
}
bookList.add(new Section(book.getShortName(), rawData[0], verseNumbers, categorizations));
}
} else {
List<Section> chapterList = new ArrayList<>();
for (int i = 0; i < chapCount; i++) {
if (!book.getChapters().get(i).getVerses().isEmpty()) {
String[] verseNumbers = new String[book.getChapters().get(i).getVerses().size()];
for (int j = 0; j < verseNumbers.length; j++) {
verseNumbers[j] = book.getChapters().get(i).getVerses().get(j).getNumber();
}
chapterList.add(new Section(book.getShortName() + " " + (i + 1), rawData[i], verseNumbers, categorizations));
}
}
if (!chapterList.isEmpty())
bookList.add(new Section(book.getShortName(), chapterList));
}
}
List<Section> testaments = new ArrayList<>();
if (!otBooks.isEmpty())
testaments.add(new Section(System.getProperty("heatmap.label.ot", "Old Testament"), otBooks));
if (!ntBooks.isEmpty())
testaments.add(new Section(System.getProperty("heatmap.label.ot", "New Testament"), ntBooks));
Section total = new Section(System.getProperty("heatmap.label.total", "Total"), testaments);
String infoText = System.getProperty("heatmap.label.infotext", "");
try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8))) {
bw.write("<html>\n" + "<head>\n" + "<meta charset=\"UTF-8\">\n" + "<title>" + html(title) + "</title>\n" + "<style type=\"text/css\">\n" + "body { font-family: Verdana, Arial, Helvetica, sans-serif; }\n" + "table {border-collapse: collapse; font-size: 6pt; }\n" + "td, th {border: 1px solid #777; }\n" + "span.versedetails {display: inline-block; min-width: 2em; text-align: center; border: 1px solid black; margin: 1px; }\n" + "span.versedetails b {margin: 0px 2px;}\n" + "</style>\n" + "<script>\n" + "function toggle(elem) {\n" + " var label = '';\n" + " var trs = document.getElementsByTagName('tr');\n" + " for (var i=0; i < trs.length; i++) {\n" + " var id = trs[i].id;\n" + " if (id == elem) {\n" + " label = trs[i].getElementsByTagName('a')[0].innerHTML;\n" + " if (label == '[+]')\n" + " trs[i].getElementsByTagName('a')[0].innerHTML = '[-]';\n" + " else\n" + " trs[i].getElementsByTagName('a')[0].innerHTML = '[+]';\n" + " } else if (id.length > elem.length && id.substring(0, elem.length+1) == elem+'_') {\n" + " if (label=='[-]') {\n" + " trs[i].style.display='none';\n" + " } else if (id.substring(elem.length+1).indexOf('_') == -1) {\n" + " trs[i].style.display='';\n" + " }\n" + " }\n" + " }\n" + "}\n" + "window.onload = function() {\n" + " document.getElementById('root').style.display='';\n" + " toggle('root');\n" + "}\n" + "</script>\n" + "</head>\n" + "<body>\n" + "<h1>" + html(title) + "</h1>\n" + (infoText.isEmpty() ? "" : "<p>" + html(infoText) + "</p>\n") + "<table>");
for (Categorization c : categorizations) {
bw.write("<tr><th colspan=\"2\">" + html(c.title) + "</th></tr>");
for (int i = 0; i < c.colors.length; i++) {
bw.write("<tr><th style=\"background-color: " + c.colors[i] + "\">#" + (i + 1) + "</th><td>" + html(c.names[i]) + "</td></tr>");
}
}
bw.write("</table>\n<br />\n");
bw.write("<table><tr><th>" + html(System.getProperty("heatmap.label.section", "Section")) + "</th>");
for (Categorization c : categorizations) {
bw.write("<th>" + html(c.title) + "</th>");
for (int i = 0; i < c.colors.length; i++) {
bw.write("<th style=\"background-color: " + c.colors[i] + "\"><i>" + html(c.names[i]) + "</i></th>");
}
}
bw.write("</tr>\n");
appendHTML(bw, 0, "root", total, categorizations);
bw.write("</table>");
}
}
use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.
the class OSIS method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
Element osis = doc.createElement("osis");
doc.appendChild(osis);
osis.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
osis.setAttribute("xmlns", "http://www.bibletechnologies.net/2003/OSIS/namespace");
osis.setAttribute("xsi:schemaLocation", "http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd");
Element osisText = doc.createElement("osisText");
osis.appendChild(osisText);
osisText.setAttribute("canonical", "true");
osisText.setAttribute("osisIDWork", "Exported");
osisText.appendChild(buildHeader(doc, bible.getName()));
for (Book bk : bible.getBooks()) {
Element book = doc.createElement("div");
osisText.appendChild(book);
book.setAttribute("type", "book");
book.setAttribute("canonical", "true");
book.setAttribute("osisID", bk.getId().getOsisID());
Element bookTitle = doc.createElement("title");
book.appendChild(bookTitle);
bookTitle.setAttribute("type", "main");
bookTitle.appendChild(doc.createTextNode(bk.getLongName()));
int cnumber = 0;
for (Chapter chp : bk.getChapters()) {
cnumber++;
Element chapter = doc.createElement("chapter");
book.appendChild(chapter);
chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
Element elem = doc.createElement("title");
chapter.appendChild(elem);
elem.setAttribute("type", "chapter");
elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
if (chp.getProlog() != null) {
chp.getProlog().accept(visitor);
}
for (VirtualVerse vv : chp.createVirtualVerses()) {
String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
for (Headline hl : vv.getHeadlines()) {
hl.accept(visitor.visitHeadline(hl.getDepth()));
}
Element verse = doc.createElement("verse");
chapter.appendChild(verse);
verse.setAttribute("osisID", osisID);
for (Verse v : vv.getVerses()) {
if (!v.getNumber().equals("" + vv.getNumber())) {
elem = doc.createElement("hi");
verse.appendChild(elem);
elem.setAttribute("type", "bold");
elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
}
v.accept(new OSISVisitor(verse, bk.getId().isNT()));
}
}
}
}
String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
if (!milestonedElementNames.equals("-")) {
Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
unsupportedMilestonedElements.removeAll(GENERATED_MILESTONEABLE_ELEMENTS);
if (!unsupportedMilestonedElements.isEmpty()) {
for (String elem : unsupportedMilestonedElements) {
if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
System.out.println("ERROR: " + elem + " may not be milestoned");
} else {
System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
}
}
throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
}
convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
}
TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
}
use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.
the class OSIS method doImport.
@Override
public Bible doImport(File inputFile) throws Exception {
ValidateXML.validateFileBeforeParsing(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(ObjectFactory.class.getResource("/osisCore.2.1.1.xsd")), inputFile);
printedWarnings.clear();
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
XPath xpath = javax.xml.xpath.XPathFactory.newInstance().newXPath();
Document osisDoc = docBuilder.parse(inputFile);
String name = xpath.evaluate("/osis/osisText/header/work/title/text()", osisDoc);
if (name.isEmpty())
name = "OSIS Bible";
Bible result = new Bible(name);
String description = xpath.evaluate("/osis/osisText/header/work/description/text()", osisDoc);
String rights = xpath.evaluate("/osis/osisText/header/work/rights/text()", osisDoc);
if (!description.isEmpty() || !rights.isEmpty()) {
String date = xpath.evaluate("/osis/osisText/header/work/date/text()", osisDoc);
String titleDesc = xpath.evaluate("/osis/osisText/titlePage/description/text()", osisDoc);
MetadataBook mb = new MetadataBook();
if (!description.isEmpty())
mb.setValue(MetadataBookKey.description, description.replaceAll("[\r\n\t ]+", " ").trim());
if (!rights.isEmpty())
mb.setValue(MetadataBookKey.rights, rights.replaceAll("[\r\n\t ]+", " ").trim());
if (!date.isEmpty())
mb.setValue(MetadataBookKey.date, date);
if (!titleDesc.isEmpty())
mb.setValue("description@titlePage", titleDesc.replaceAll("[\r\n\t ]+", " ").trim());
mb.finished();
result.getBooks().add(mb.getBook());
}
NodeList osisBooks = (NodeList) xpath.evaluate("/osis/osisText//div[@type='book']", osisDoc, XPathConstants.NODESET);
for (int bookIndex = 0; bookIndex < osisBooks.getLength(); bookIndex++) {
Element osisBook = (Element) osisBooks.item(bookIndex);
if (!osisBook.getAttribute("sID").isEmpty()) {
Element osisBookEnd = (Element) xpath.evaluate("//div[@eID='" + osisBook.getAttribute("sID") + "']", osisDoc, XPathConstants.NODE);
if (osisBookEnd == null) {
throw new IllegalStateException("No milestoned div found with eID " + osisBook.getAttribute("sID"));
}
if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
List<Node> candidates = new ArrayList<>();
Node commonParent = osisBookEnd;
while (commonParent != null) {
candidates.add(commonParent);
commonParent = commonParent.getParentNode();
}
commonParent = osisBook;
search: while (commonParent != null) {
for (Node candidate : candidates) {
if (commonParent.isSameNode(candidate)) {
break search;
}
}
commonParent = commonParent.getParentNode();
}
if (commonParent == null)
throw new IllegalStateException("Unable to find common parent of milestoned div start and end tag");
convertToMilestoned((Element) commonParent);
if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
throw new IllegalStateException("Unable to normalize XML so that milestoned div start and end tags are siblings");
}
}
while (osisBook.getNextSibling() != null && !osisBook.getNextSibling().isSameNode(osisBookEnd)) {
osisBook.appendChild(osisBook.getNextSibling());
}
osisBookEnd.getParentNode().removeChild(osisBookEnd);
}
String bookOsisID = osisBook.getAttribute("osisID");
BookID bookID = BookID.fromOsisId(bookOsisID);
String title = bookID.getEnglishName();
Node titleElem = osisBook.getFirstChild();
while (titleElem instanceof Text) titleElem = titleElem.getNextSibling();
if (titleElem instanceof Element && titleElem.getNodeName().equals("title")) {
Element titleElement = (Element) titleElem;
if (titleElement.getAttribute("type").equals("main") && titleElement.getChildNodes().getLength() > 0)
title = titleElement.getTextContent();
}
Book bibleBook = new Book(bookOsisID, bookID, title, title);
result.getBooks().add(bibleBook);
parseBook(bookOsisID, osisBook, bibleBook);
}
return result;
}
Aggregations