use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.
the class NeUeParser method getPrologVisitor.
private Visitor<RuntimeException> getPrologVisitor(Book book) {
FormattedText prolog = new FormattedText();
book.getChapters().add(new Chapter());
book.getChapters().get(0).setProlog(prolog);
return prolog.getAppendVisitor();
}
use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.
the class OSIS method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
Element osis = doc.createElement("osis");
doc.appendChild(osis);
osis.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
osis.setAttribute("xmlns", "http://www.bibletechnologies.net/2003/OSIS/namespace");
osis.setAttribute("xsi:schemaLocation", "http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd");
Element osisText = doc.createElement("osisText");
osis.appendChild(osisText);
osisText.setAttribute("canonical", "true");
osisText.setAttribute("osisIDWork", "Exported");
osisText.appendChild(buildHeader(doc, bible.getName()));
for (Book bk : bible.getBooks()) {
Element book = doc.createElement("div");
osisText.appendChild(book);
book.setAttribute("type", "book");
book.setAttribute("canonical", "true");
book.setAttribute("osisID", bk.getId().getOsisID());
Element bookTitle = doc.createElement("title");
book.appendChild(bookTitle);
bookTitle.setAttribute("type", "main");
bookTitle.appendChild(doc.createTextNode(bk.getLongName()));
int cnumber = 0;
for (Chapter chp : bk.getChapters()) {
cnumber++;
Element chapter = doc.createElement("chapter");
book.appendChild(chapter);
chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
Element elem = doc.createElement("title");
chapter.appendChild(elem);
elem.setAttribute("type", "chapter");
elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
if (chp.getProlog() != null) {
chp.getProlog().accept(visitor);
}
for (VirtualVerse vv : chp.createVirtualVerses()) {
String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
for (Headline hl : vv.getHeadlines()) {
hl.accept(visitor.visitHeadline(hl.getDepth()));
}
Element verse = doc.createElement("verse");
chapter.appendChild(verse);
verse.setAttribute("osisID", osisID);
for (Verse v : vv.getVerses()) {
if (!v.getNumber().equals("" + vv.getNumber())) {
elem = doc.createElement("hi");
verse.appendChild(elem);
elem.setAttribute("type", "bold");
elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
}
v.accept(new OSISVisitor(verse, bk.getId().isNT()));
}
}
}
}
String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
if (!milestonedElementNames.equals("-")) {
Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
unsupportedMilestonedElements.removeAll(GENERATED_MILESTONEABLE_ELEMENTS);
if (!unsupportedMilestonedElements.isEmpty()) {
for (String elem : unsupportedMilestonedElements) {
if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
System.out.println("ERROR: " + elem + " may not be milestoned");
} else {
System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
}
}
throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
}
convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
}
TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
}
use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.
the class OSIS method parseBook.
private void parseBook(String bookName, Element osisBook, Book bibleBook) {
warningContext = bookName;
convertToMilestoned(osisBook);
List<Element> unclosedElements = new ArrayList<Element>();
for (Node node = osisBook.getFirstChild(); node != null; node = node.getNextSibling()) {
if (node instanceof Text) {
if (((Text) node).getTextContent().trim().length() > 0)
printWarning("WARNING: Non-whitespace text at book level");
} else if (node instanceof Comment) {
continue;
} else {
Element elem = (Element) node;
if (elem.getNodeName().equals("title")) {
if (elem.getAttribute("type").equals("main")) {
if (!getTextChildren(elem).equals(bibleBook.getLongName())) {
printWarning("WARNING: More than one book title found");
}
} else {
printWarning("WARNING: invalid book level title type: " + elem.getAttribute("type"));
}
} else if (elem.getNodeName().equals("chapter")) {
String chapterName = elem.getAttribute("osisID");
if (chapterName.contains("-")) {
chapterName = chapterName.substring(0, chapterName.indexOf("-"));
printWarning("WARNING: Invalid chapter OSIS reference: " + elem.getAttribute("osisID") + ", using " + chapterName);
}
if (!chapterName.startsWith(bookName + ".")) {
throw new IllegalStateException("Invalid chapter " + chapterName + " of book " + bookName);
} else {
int cnumber = Integer.parseInt(chapterName.substring(bookName.length() + 1));
while (bibleBook.getChapters().size() < cnumber) {
bibleBook.getChapters().add(new Chapter());
}
warningContext = chapterName;
parseChapter(chapterName, elem, bibleBook.getChapters().get(cnumber - 1), unclosedElements);
warningContext = bookName;
}
} else {
printWarning("WARNING: invalid book level tag: " + elem.getNodeName());
}
}
}
if (unclosedElements.size() > 0) {
StringBuilder message = new StringBuilder("WARNING: Unclosed milestoned elements:");
for (Element elem : unclosedElements) {
message.append(" " + elem.getNodeName() + "[" + elem.getAttribute("sID") + "]");
}
printWarning(message.toString());
}
}
use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.
the class RoundtripHTML method doImport.
@Override
public Bible doImport(File inputDir) throws Exception {
Bible bible;
// metadata
try (BufferedReader br = createReader(inputDir, "metadata.js")) {
String line = br.readLine();
br.readLine();
bible = new Bible(line.substring(13, line.length() - 2).replace("\\\"", "\"").replace("\\\\", "\\"));
Map<String, Object> fieldMap = new HashMap<String, Object>();
while ((line = br.readLine()) != null) {
if (line.startsWith("}")) {
Book bk = new Book((String) fieldMap.get("abbr"), BookID.fromOsisId((String) fieldMap.get("osis")), (String) fieldMap.get("short"), (String) fieldMap.get("long"));
for (int i = 0; i < (Integer) fieldMap.get("chapters"); i++) {
bk.getChapters().add(new Chapter());
}
bible.getBooks().add(bk);
continue;
}
int pos = line.indexOf(":");
String key = line.substring(0, pos);
String value = line.substring(pos + 1);
if (value.endsWith(","))
value = value.substring(0, value.length() - 1);
if (value.startsWith("\"") && value.endsWith("\"")) {
fieldMap.put(key, value.substring(1, value.length() - 1).replace("\\\"", "\"").replace("\\\\", "\\"));
} else if (value.equals("true") || value.equals("false")) {
fieldMap.put(key, Boolean.parseBoolean(value));
} else {
fieldMap.put(key, Integer.parseInt(value));
}
}
}
// chapters
for (Book bk : bible.getBooks()) {
int cnumber = 0;
for (Chapter ch : bk.getChapters()) {
cnumber++;
try (BufferedReader br = createReader(inputDir, getTypeDir(bk.getId()) + "/" + bk.getAbbr() + "_" + cnumber + ".html")) {
String line;
List<FormattedText.Visitor<RuntimeException>> footnotes = new ArrayList<>();
while ((line = br.readLine()) != null) {
if (line.equals("<div class=\"biblehtmlcontent prolog\">")) {
line = br.readLine();
FormattedText prolog = new FormattedText();
int end = parseLine(prolog.getAppendVisitor(), line, 0, footnotes);
ch.setProlog(prolog);
if (end != line.length())
throw new IOException(line.substring(end));
line = br.readLine();
if (!line.equals("</div>"))
throw new IOException(line);
} else if (line.equals("<div class=\"biblehtmlcontent verses\" id=\"verses\">")) {
while ((line = br.readLine()) != null) {
if (line.equals("</div>"))
break;
if (!line.startsWith("<div class=\"v\" id=\"v") || !line.endsWith("</div>"))
throw new IOException(line);
line = line.substring(20, line.length() - 6);
int pos = line.indexOf("\">");
Verse v = new Verse(line.substring(0, pos));
int end = parseLine(v.getAppendVisitor(), line, pos + 2, footnotes);
if (end != line.length())
throw new IOException(line.substring(end));
ch.getVerses().add(v);
}
if (!line.equals("</div>"))
throw new IOException(line);
} else if (line.equals("<div class=\"biblehtmlcontent footnotes\">")) {
for (int i = 0; i < footnotes.size(); i++) {
line = br.readLine();
String prefix = "<div class=\"fn\"><sup class=\"fnt\"><a name=\"fn" + (i + 1) + "\" href=\"#fnm" + (i + 1) + "\">" + (i + 1) + "</a></sup> ";
if (!line.startsWith(prefix) || !line.endsWith("</div>"))
throw new IOException(line);
line = line.substring(prefix.length(), line.length() - 6);
int end = parseLine(footnotes.get(i), line, 0, null);
if (end != line.length())
throw new IOException(line.substring(end));
}
line = br.readLine();
if (!line.equals("</div>"))
throw new IOException(line);
}
}
if (ch.getProlog() != null)
ch.getProlog().finished();
for (Verse v : ch.getVerses()) v.finished();
}
}
}
return bible;
}
Aggregations