use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class OSIS method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
Element osis = doc.createElement("osis");
doc.appendChild(osis);
osis.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
osis.setAttribute("xmlns", "http://www.bibletechnologies.net/2003/OSIS/namespace");
osis.setAttribute("xsi:schemaLocation", "http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd");
Element osisText = doc.createElement("osisText");
osis.appendChild(osisText);
osisText.setAttribute("canonical", "true");
osisText.setAttribute("osisIDWork", "Exported");
osisText.appendChild(buildHeader(doc, bible.getName()));
for (Book bk : bible.getBooks()) {
Element book = doc.createElement("div");
osisText.appendChild(book);
book.setAttribute("type", "book");
book.setAttribute("canonical", "true");
book.setAttribute("osisID", bk.getId().getOsisID());
Element bookTitle = doc.createElement("title");
book.appendChild(bookTitle);
bookTitle.setAttribute("type", "main");
bookTitle.appendChild(doc.createTextNode(bk.getLongName()));
int cnumber = 0;
for (Chapter chp : bk.getChapters()) {
cnumber++;
Element chapter = doc.createElement("chapter");
book.appendChild(chapter);
chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
Element elem = doc.createElement("title");
chapter.appendChild(elem);
elem.setAttribute("type", "chapter");
elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
if (chp.getProlog() != null) {
chp.getProlog().accept(visitor);
}
for (VirtualVerse vv : chp.createVirtualVerses()) {
String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
for (Headline hl : vv.getHeadlines()) {
hl.accept(visitor.visitHeadline(hl.getDepth()));
}
Element verse = doc.createElement("verse");
chapter.appendChild(verse);
verse.setAttribute("osisID", osisID);
for (Verse v : vv.getVerses()) {
if (!v.getNumber().equals("" + vv.getNumber())) {
elem = doc.createElement("hi");
verse.appendChild(elem);
elem.setAttribute("type", "bold");
elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
}
v.accept(new OSISVisitor(verse, bk.getId().isNT()));
}
}
}
}
String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
if (!milestonedElementNames.equals("-")) {
Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
unsupportedMilestonedElements.removeAll(GENERATED_MILESTONEABLE_ELEMENTS);
if (!unsupportedMilestonedElements.isEmpty()) {
for (String elem : unsupportedMilestonedElements) {
if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
System.out.println("ERROR: " + elem + " may not be milestoned");
} else {
System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
}
}
throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
}
convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
}
TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class OSIS method parseChapter.
private void parseChapter(String chapterName, Element osisChapter, Chapter chapter, List<Element> unclosedElements) {
int lastVerse = -1;
List<Headline> headlines = new ArrayList<Headline>();
for (Node node = osisChapter.getFirstChild(); node != null; node = node.getNextSibling()) {
boolean startProlog = false;
if (node instanceof Text) {
if (node.getTextContent().trim().length() == 0)
continue;
if (lastVerse == -1) {
startProlog = true;
} else {
printWarning("WARNING: Non-whitespace at chapter level: " + node.getTextContent());
}
} else if (node instanceof Element) {
Element elem = (Element) node;
if (elem.getNodeName().equals("title")) {
Headline hl = new Headline(2);
if (elem.getAttribute("type").equals("chapter")) {
hl = new Headline(1);
}
if (elem.getChildNodes().getLength() == 1 && elem.getFirstChild() instanceof Text) {
String text = elem.getFirstChild().getTextContent();
if (!text.equals(text.trim())) {
printWarning("WARNING: Whitespace at beginning/end of headline: '" + text + "'");
elem.getFirstChild().setNodeValue(text.trim());
}
}
convertFromMilestoned(elem, unclosedElements);
parseFormattedText(null, elem, hl);
if (hl.getElementTypes(1).length() == 0) {
printWarning("WARNING: Empty headline in " + chapterName);
} else {
headlines.add(hl);
}
} else if (elem.getNodeName().equals("verse")) {
String osisID = elem.getAttribute("osisID");
if (!elem.getAttribute("sID").isEmpty() || !elem.getAttribute("sID").isEmpty())
throw new IllegalArgumentException("verse should have been de-milestoned already.");
if (osisID.isEmpty())
throw new IllegalStateException("Verse without osisID");
if (!osisID.startsWith(chapterName + "."))
throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
String vnumber = osisID.substring(chapterName.length() + 1);
if (osisID.contains(" ")) {
vnumber = vnumber.substring(0, vnumber.indexOf(' '));
lastVerse = Integer.parseInt(vnumber);
int nextInRange = lastVerse + 1;
boolean first = true;
for (String part : osisID.split(" ")) {
if (first) {
first = false;
continue;
}
if (!part.startsWith(chapterName + "."))
throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
String partNumber = part.substring(chapterName.length() + 1);
vnumber = vnumber + "." + partNumber;
if (partNumber.equals("" + nextInRange)) {
nextInRange++;
} else {
nextInRange = -1;
}
}
if (nextInRange != -1) {
vnumber = lastVerse + "-" + (nextInRange - 1);
}
} else {
lastVerse = Integer.parseInt(vnumber);
}
Verse verse = new Verse(vnumber);
warningContext = osisID;
for (Headline hl : headlines) {
hl.accept(verse.getAppendVisitor().visitHeadline(hl.getDepth()));
}
headlines.clear();
chapter.getVerses().add(verse);
convertFromMilestoned(elem, unclosedElements);
parseFormattedText(osisID, elem, verse);
verse.trimWhitespace();
verse.finished();
if (verse.getElementTypes(1).length() == 0) {
printWarning("WARNING: Empty verse " + osisID);
chapter.getVerses().remove(verse);
}
warningContext += " (after closing)";
} else if (lastVerse == -1) {
startProlog = true;
} else {
printWarning("WARNING: " + elem.getNodeName() + " at invalid location");
}
}
if (startProlog) {
Element holder = osisChapter.getOwnerDocument().createElement("prolog");
osisChapter.insertBefore(holder, node);
while (holder.getNextSibling() != null && !holder.getNextSibling().getNodeName().equals("verse")) {
holder.appendChild(holder.getNextSibling());
}
lastVerse = 0;
FormattedText prolog = new FormattedText();
chapter.setProlog(prolog);
for (Headline hl : headlines) {
hl.accept(prolog.getAppendVisitor().visitHeadline(hl.getDepth()));
}
headlines.clear();
convertFromMilestoned(holder, unclosedElements);
parseFormattedText(null, holder, prolog);
prolog.trimWhitespace();
prolog.finished();
node = holder;
}
}
if (headlines.size() > 0)
printWarning("WARNING: Unused headlines: " + headlines.size());
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class YCHPalmBible method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
String filename = exportArgs[0];
String description = bible.getName();
MetadataBook metadata = bible.getMetadataBook();
if (metadata != null) {
String metaDescription = bible.getMetadataBook().getValue(MetadataBookKey.description);
if (metaDescription != null)
description = metaDescription;
}
try (final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "windows-1252"))) {
bw.write("<PARSERINFO ENCODE=\"Cp1252\" WORDTYPE=\"SPCSEP\">");
bw.newLine();
bw.write("<BIBLE NAME=\"" + bible.getName() + "\" INFO=\"" + description + "\">");
bw.newLine();
Visitor<IOException> contentVisitor = new FormattedText.VisitorAdapter<IOException>(null) {
@Override
public void visitVerseSeparator() throws IOException {
// strip
}
@Override
public void visitText(String text) throws IOException {
bw.write(text);
}
@Override
public Visitor<IOException> visitFormattingInstruction(FormattingInstructionKind kind) throws IOException {
return this;
}
@Override
public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
return this;
}
@Override
public void visitLineBreak(LineBreakKind kind) throws IOException {
bw.write(" ");
}
};
for (Book bk : bible.getBooks()) {
int zefID = bk.getId().getZefID();
if (zefID < 1 || zefID >= PALM_BOOK_NUMBERS.length || PALM_BOOK_NUMBERS[zefID] == 0) {
System.out.println("WARNING: Skipping unsupported book " + bk.getAbbr() + " (" + bk.getId().getOsisID() + ")");
continue;
}
bw.write("<BOOK NAME=\"" + bk.getShortName() + "\" NUMBER=\"" + PALM_BOOK_NUMBERS[bk.getId().getZefID()] + "\" SHORTCUT=\"" + bk.getAbbr() + "\">");
bw.newLine();
String longtitle = bk.getLongName();
int chapter = 0, verse;
for (Chapter chap : bk.getChapters()) {
chapter++;
if (chap.getProlog() != null)
System.out.println("WARNING: Skipping prolog (prologs not supported)!");
verse = 1;
String chaptext = CHAPTER_NAME + " " + chapter;
bw.write("<CHAPTER>");
bw.newLine();
for (VirtualVerse v : chap.createVirtualVerses()) {
while (v.getNumber() > verse) {
bw.write("<VERSE></VERSE>");
bw.newLine();
verse++;
}
if (v.getNumber() != verse)
throw new RuntimeException("Verse is " + v.getNumber() + ", should be " + verse);
boolean needVersText = false;
bw.write("<VERSE>");
if (longtitle != null) {
bw.write("<BOOKTEXT>" + longtitle);
longtitle = null;
needVersText = true;
}
if (chaptext != null) {
bw.write("<CHAPTEXT>" + chaptext);
chaptext = null;
needVersText = true;
}
for (Headline hl : v.getHeadlines()) {
bw.write("<DESCTEXT>");
hl.accept(contentVisitor);
needVersText = true;
}
if (needVersText)
bw.write("<VERSTEXT>");
for (Verse vv : v.getVerses()) {
if (!vv.getNumber().equals("" + v.getNumber())) {
bw.write("{" + vv.getNumber() + "} ");
}
vv.accept(contentVisitor);
}
bw.write("</VERSE>");
verse++;
bw.newLine();
}
bw.write("</CHAPTER>");
bw.newLine();
}
bw.write("</BOOK>");
bw.newLine();
}
bw.write("</BIBLE>");
bw.newLine();
}
}
use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.
the class ZefaniaXML method createXMLBible.
protected XMLBIBLE createXMLBible(Bible bible) throws Exception {
ObjectFactory of = new ObjectFactory();
XMLBIBLE doc = of.createXMLBIBLE();
doc.setBiblename(bible.getName());
doc.setType(EnumModtyp.X_BIBLE);
doc.setINFORMATION(of.createINFORMATION());
MetadataBook metadata = bible.getMetadataBook();
if (metadata != null) {
for (String key : metadata.getKeys()) {
String value = metadata.getValue(key);
if (key.equals(MetadataBookKey.status.toString())) {
doc.setStatus(EnumStatus.fromValue(value));
} else if (key.equals(MetadataBookKey.version.toString())) {
doc.setVersion(value);
} else if (key.equals(MetadataBookKey.revision.toString())) {
doc.setRevision(new BigInteger(value));
} else if (!key.contains("@")) {
Pattern regex = INFORMATION_FIELDS.get(MetadataBookKey.valueOf(key));
if (regex != null && regex.matcher(value).matches())
doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
}
}
}
doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName("format"), String.class, "Zefania XML Bible Markup Language"));
for (Book bk : bible.getBooks()) {
if (bk.getId().equals(BookID.METADATA))
continue;
if (bk.getId().getZefID() <= 0) {
System.out.println("WARNING: Unable to export book " + bk.getAbbr());
continue;
}
BIBLEBOOK bb = of.createBIBLEBOOK();
bb.setBnumber(BigInteger.valueOf(bk.getId().getZefID()));
bb.setBsname(bk.getShortName());
bb.setBname(bk.getLongName());
int cnumber = 0;
for (Chapter ccc : bk.getChapters()) {
cnumber++;
if (ccc.getVerses().size() == 0)
continue;
CHAPTER cc = of.createCHAPTER();
cc.setCnumber(BigInteger.valueOf(cnumber));
bb.getCHAPTER().add(cc);
if (ccc.getProlog() != null) {
PROLOG prolog = of.createPROLOG();
prolog.setVref(BigInteger.ONE);
ccc.getProlog().accept(new CreateContentVisitor(of, prolog.getContent(), null));
cc.getPROLOGOrCAPTIONOrVERS().add(prolog);
}
for (VirtualVerse vv : ccc.createVirtualVerses()) {
for (Headline h : vv.getHeadlines()) {
CAPTION caption = of.createCAPTION();
caption.setVref(BigInteger.valueOf(vv.getNumber()));
h.accept(new CreateContentVisitor(of, caption.getContent(), null));
EnumCaptionType[] types = new EnumCaptionType[] { null, EnumCaptionType.X_H_1, EnumCaptionType.X_H_2, EnumCaptionType.X_H_3, EnumCaptionType.X_H_4, EnumCaptionType.X_H_5, EnumCaptionType.X_H_6, null, null, null };
caption.setType(types[h.getDepth()]);
cc.getPROLOGOrCAPTIONOrVERS().add(caption);
}
VERS vers = of.createVERS();
vers.setVnumber(BigInteger.valueOf(vv.getNumber()));
for (Verse v : vv.getVerses()) {
if (!v.getNumber().equals("" + vv.getNumber())) {
STYLE verseNum = of.createSTYLE();
verseNum.setCss("font-weight: bold");
verseNum.getContent().add("(" + v.getNumber() + ")");
vers.getContent().add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, verseNum));
vers.getContent().add(" ");
}
v.accept(new CreateContentVisitor(of, vers.getContent(), vers));
}
cc.getPROLOGOrCAPTIONOrVERS().add(vers);
}
}
doc.getBIBLEBOOK().add(bb);
}
return doc;
}
Aggregations