use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.
the class Diffable method parseDiffable.
private void parseDiffable(Visitor<RuntimeException> visitor, String line) throws IOException {
int lastPos = 0, pos = line.indexOf('<');
List<Visitor<RuntimeException>> visitorStack = new ArrayList<Visitor<RuntimeException>>();
while (pos != -1) {
if (pos > lastPos) {
visitor.visitText(line.substring(lastPos, pos));
}
int endPos = line.indexOf('>', pos);
if (endPos == -1)
throw new IOException("Unclosed tag: " + line.substring(pos));
String tag = line.substring(pos + 1, endPos);
if (tag.length() > 1 && tag.endsWith("/"))
tag = tag.substring(0, tag.length() - 1);
Map<String, String> tagArgs = new HashMap<String, String>();
lastPos = endPos + 1;
if (tag.contains(" ")) {
int tpos = tag.indexOf(' ');
while (tpos < tag.length()) {
if (tag.charAt(tpos) == ' ')
tpos++;
int aspos = tag.indexOf("=\"", tpos);
int aepos = tag.indexOf("\"", aspos + 2);
if (aspos == -1 || aepos == -1)
throw new IOException("Malformed tag: <" + tag + ">");
tagArgs.put(tag.substring(tpos, aspos), tag.substring(aspos + 2, aepos));
tpos = aepos + 1;
}
tag = tag.substring(0, tag.indexOf(' '));
}
if (tag.startsWith("/")) {
visitor = visitorStack.remove(visitorStack.size() - 1);
} else if (tag.length() == 1 && tag.charAt(0) >= 'a' && tag.charAt(0) <= 'z') {
visitorStack.add(visitor);
visitor = visitor.visitFormattingInstruction(FormattingInstructionKind.fromChar(tag.charAt(0)));
} else if (tag.length() == 2 && tag.startsWith("h") && tag.charAt(1) >= '1' && tag.charAt(1) <= '9') {
visitorStack.add(visitor);
visitor = visitor.visitHeadline(tag.charAt(1) - '0');
} else if (tag.startsWith("raw:")) {
validateTagArgs(tag, tagArgs, "mode");
int markerPos = line.indexOf("</" + tag + ">", lastPos);
visitor.visitRawHTML(RawHTMLMode.valueOf(tagArgs.get("mode")), line.substring(lastPos, markerPos));
lastPos = markerPos + tag.length() + 3;
} else {
switch(tag) {
case "<":
visitor.visitText("<");
break;
case "fn":
visitorStack.add(visitor);
visitor = visitor.visitFootnote();
break;
case "css":
validateTagArgs(tag, tagArgs, "style");
visitorStack.add(visitor);
visitor = visitor.visitCSSFormatting(tagArgs.get("style"));
break;
case "vs":
visitor.visitVerseSeparator();
break;
case "br":
validateTagArgs(tag, tagArgs, "kind");
visitor.visitLineBreak(LineBreakKind.valueOf(tagArgs.get("kind")));
break;
case "grammar":
validateTagArgs(tag, tagArgs, "strong", "rmac", "idx");
visitorStack.add(visitor);
visitor = visitor.visitGrammarInformation(intArray(tagArgs.get("strong")), tagArgs.get("rmac").length() == 0 ? null : tagArgs.get("rmac").split(","), intArray(tagArgs.get("idx")));
break;
case "dict":
validateTagArgs(tag, tagArgs, "dictionary", "entry");
visitorStack.add(visitor);
visitor = visitor.visitDictionaryEntry(tagArgs.get("dictionary"), tagArgs.get("entry"));
break;
case "var":
validateTagArgs(tag, tagArgs, "vars");
visitorStack.add(visitor);
visitor = visitor.visitVariationText(tagArgs.get("vars").split(","));
break;
case "extra":
validateTagArgs(tag, tagArgs, "prio", "category", "key", "value");
visitorStack.add(visitor);
visitor = visitor.visitExtraAttribute(ExtraAttributePriority.valueOf(tagArgs.get("prio")), tagArgs.get("category"), tagArgs.get("key"), tagArgs.get("value"));
break;
case "xref":
validateTagArgs(tag, tagArgs, "abbr", "id", "chapters", "verses");
String[] chapters = tagArgs.get("chapters").split(":");
String[] verses = tagArgs.get("verses").split(":");
if (chapters.length != 2 || verses.length != 2)
throw new IOException("Malformed \"abbr\" tag arguments: " + tagArgs);
visitorStack.add(visitor);
visitor = visitor.visitCrossReference(tagArgs.get("abbr"), BookID.fromOsisId(tagArgs.get("id")), Integer.parseInt(chapters[0]), verses[0], Integer.parseInt(chapters[1]), verses[1]);
break;
default:
throw new IOException("Unsupported tag: " + tag);
}
}
pos = line.indexOf('<', lastPos);
}
if (lastPos < line.length())
visitor.visitText(line.substring(lastPos));
if (visitorStack.size() > 0)
throw new RuntimeException("Unclosed tags: " + line);
}
use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.
the class RoundtripXML method parseContent.
private void parseContent(Visitor<RuntimeException> visitor, List<Serializable> contentList) throws IOException {
for (Serializable content : contentList) {
if (content instanceof String) {
visitor.visitText((String) content);
} else if (content instanceof JAXBElement<?>) {
Object value = ((JAXBElement<?>) content).getValue();
Visitor<RuntimeException> next;
if (value instanceof FormattedTextType.Headline) {
next = visitor.visitHeadline(((FormattedTextType.Headline) value).getDepth());
} else if (value instanceof FormattedTextType.Footnote) {
next = visitor.visitFootnote();
} else if (value instanceof FormattedTextType.CrossReference) {
FormattedTextType.CrossReference xr = (FormattedTextType.CrossReference) value;
next = visitor.visitCrossReference(xr.getBookAbbr(), BookID.fromOsisId(xr.getBook()), xr.getFirstChapter(), xr.getFirstVerse(), xr.getLastChapter(), xr.getLastVerse());
} else if (value instanceof FormattedTextType.LineBreak) {
visitor.visitLineBreak(LineBreakKind.valueOf(((FormattedTextType.LineBreak) value).getKind().name()));
continue;
} else if (value instanceof FormattedTextType.DictionaryEntry) {
FormattedTextType.DictionaryEntry de = (FormattedTextType.DictionaryEntry) value;
next = visitor.visitDictionaryEntry(de.getDictionary(), de.getEntry());
} else if (value instanceof FormattedTextType.GrammarInformation) {
FormattedTextType.GrammarInformation gi = (FormattedTextType.GrammarInformation) value;
int[] strongs = null;
if (!gi.getStrongs().isEmpty()) {
strongs = new int[gi.getStrongs().size()];
for (int i = 0; i < strongs.length; i++) {
strongs[i] = gi.getStrongs().get(i);
}
}
String[] rmacs = null;
if (!gi.getRmac().isEmpty()) {
rmacs = (String[]) gi.getRmac().toArray(new String[gi.getRmac().size()]);
}
int[] sidxs = null;
if (!gi.getSourceIndices().isEmpty()) {
sidxs = new int[gi.getSourceIndices().size()];
for (int i = 0; i < sidxs.length; i++) {
sidxs[i] = gi.getSourceIndices().get(i);
}
}
next = visitor.visitGrammarInformation(strongs, rmacs, sidxs);
} else if (value instanceof FormattedTextType.FormattingInstruction) {
next = visitor.visitFormattingInstruction(FormattingInstructionKind.valueOf(((FormattedTextType.FormattingInstruction) value).getKind().name()));
} else if (value instanceof FormattedTextType.CssFormatting) {
next = visitor.visitCSSFormatting(((FormattedTextType.CssFormatting) value).getCss());
} else if (value instanceof FormattedTextType.ExtraAttribute) {
FormattedTextType.ExtraAttribute xa = (FormattedTextType.ExtraAttribute) value;
next = visitor.visitExtraAttribute(ExtraAttributePriority.valueOf(xa.getPrio().name()), xa.getCategory(), xa.getKey(), xa.getValue());
} else if (value instanceof FormattedTextType.Variation) {
List<String> vars = ((FormattedTextType.Variation) value).getVariations();
next = visitor.visitVariationText((String[]) vars.toArray(new String[vars.size()]));
} else if (value instanceof FormattedTextType.RawHTML) {
FormattedTextType.RawHTML rh = (FormattedTextType.RawHTML) value;
visitor.visitRawHTML(RawHTMLMode.valueOf(rh.getMode().name()), rh.getValue());
continue;
} else if (value instanceof FormattedTextType.VerseSeparator) {
visitor.visitVerseSeparator();
continue;
} else {
throw new IOException("Invalid JAXBElement value: " + value.getClass());
}
parseContent(next, ((FormattedTextType) value).getContent());
} else {
throw new IOException("Invalid content: " + content.getClass());
}
}
}
use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.
the class StrippedDiffable method mergeIntroductionPrologs.
protected void mergeIntroductionPrologs(Bible bible) {
List<FormattedText> prologBuffer = new ArrayList<FormattedText>();
for (int i = 0; i < bible.getBooks().size(); i++) {
Book book = bible.getBooks().get(i);
if (book.getId().getZefID() < 0) {
if (book.getChapters().size() == 1) {
Chapter ch = book.getChapters().get(0);
if (ch.getVerses().size() > 0)
System.out.println("WARNING: Book " + book.getAbbr() + " has verses; not merged.");
if (ch.getProlog() != null)
prologBuffer.add(ch.getProlog());
else
System.out.println("WARNING: Book " + book.getAbbr() + " does not have a prolog; not merged.");
} else {
System.out.println("WARNING: Book " + book.getAbbr() + " has " + book.getChapters().size() + " chapters; not merged.");
}
bible.getBooks().remove(i);
i--;
} else if (prologBuffer.size() > 0 && book.getChapters().size() > 0) {
Chapter ch = book.getChapters().get(0);
if (ch.getProlog() != null)
prologBuffer.add(ch.getProlog());
FormattedText newProlog = new FormattedText();
Visitor<RuntimeException> v = newProlog.getAppendVisitor();
ch.setProlog(newProlog);
boolean first = true;
for (FormattedText oldProlog : prologBuffer) {
if (!first)
v.visitLineBreak(LineBreakKind.PARAGRAPH);
first = false;
oldProlog.accept(v);
}
prologBuffer.clear();
}
}
if (prologBuffer.size() > 0) {
System.out.println("WARNING: " + prologBuffer.size() + " introduction prologs after last bible book were merged to first bible book!");
for (int i = 0; i < bible.getBooks().size(); i++) {
Book book = bible.getBooks().get(i);
if (book.getId().getZefID() > 0 && prologBuffer.size() > 0 && book.getChapters().size() > 0) {
Chapter ch = book.getChapters().get(0);
Visitor<RuntimeException> v = ch.getProlog().getAppendVisitor();
for (FormattedText oldProlog : prologBuffer) {
v.visitLineBreak(LineBreakKind.PARAGRAPH);
oldProlog.accept(v);
}
break;
}
}
}
}
use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.
the class ZefDic method parseBible.
protected Bible parseBible(Dictionary doc) throws Exception {
Bible result = new Bible(doc.getType().toString() + "@" + doc.getRefbible());
MetadataBook metadata = new MetadataBook();
if (doc.getDicversion() != null) {
metadata.setValue(MetadataBookKey.version, doc.getDicversion());
}
if (doc.getRevision() != null) {
metadata.setValue(MetadataBookKey.revision, doc.getRevision());
}
for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
if (elem.getValue() == null)
continue;
String value = normalize(elem.getValue().toString(), true).trim();
if (value.length() == 0)
value = "-empty-";
metadata.setValue(elem.getName().getLocalPart(), value);
}
metadata.finished();
if (metadata.getKeys().size() > 0)
result.getBooks().add(metadata.getBook());
int counter = 0;
for (TItem item : doc.getItem()) {
String id = item.getId();
String internalId = id;
if (!id.matches(Utils.BOOK_ABBR_REGEX))
internalId = "L" + (++counter);
Book bk = new Book(internalId, BookID.DICTIONARY_ENTRY, id, id);
result.getBooks().add(bk);
FormattedText prolog = new FormattedText();
bk.getChapters().add(new Chapter());
bk.getChapters().get(0).setProlog(prolog);
Visitor<RuntimeException> vv = prolog.getAppendVisitor();
String strongId = item.getStrongId();
if (strongId != null) {
Visitor<RuntimeException> vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "itemfield", "strongid");
vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Strong-ID: ");
vvvv.visitText(strongId);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
}
for (Object s : item.getContent()) {
if (s instanceof String) {
if (((String) s).trim().length() > 0)
throw new RuntimeException((String) s);
} else if (s instanceof JAXBElement) {
Object v = ((JAXBElement<?>) s).getValue();
if (!((JAXBElement<?>) s).getName().getNamespaceURI().equals("")) {
throw new RuntimeException(((JAXBElement<?>) s).getName().getNamespaceURI());
}
String nn = ((JAXBElement<?>) s).getName().getLocalPart();
if (v instanceof TParagraph && nn.equals("description")) {
TParagraph para = (TParagraph) v;
if (para.getId() != null)
throw new RuntimeException(para.getId());
Visitor<RuntimeException> vvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "description");
for (Object oo : para.getContent()) {
if (oo instanceof String) {
vvv.visitText(normalize((String) oo, false));
} else if (oo instanceof JAXBElement) {
Object ovv = ((JAXBElement<?>) oo).getValue();
if (!((JAXBElement<?>) oo).getName().getNamespaceURI().equals("")) {
throw new RuntimeException(((JAXBElement<?>) oo).getName().getNamespaceURI());
}
String nnn = ((JAXBElement<?>) oo).getName().getLocalPart();
if (nnn.equals("br") && ovv instanceof String) {
if (((String) ovv).trim().length() > 0)
throw new RuntimeException((String) ovv);
vvv.visitLineBreak(LineBreakKind.NEWLINE);
} else if (nnn.equals("title") && ovv instanceof String) {
vvv.visitHeadline(2).visitText(((String) ovv).trim().replaceAll(" +", " "));
} else if (nnn.equals("sub") && ovv instanceof String) {
vvv.visitFormattingInstruction(FormattingInstructionKind.SUBSCRIPT).visitText(normalize((String) ovv, false));
} else if (nnn.equals("reflink") && ovv instanceof RefLinkType) {
RefLinkType r = (RefLinkType) ovv;
if (r.getTarget() != null || r.getContent() == null || r.getContent().length() > 0)
System.out.println("WARNING: Unsupported reflink attributes " + r.getTarget() + "|" + r.getContent());
if (r.getMscope() == null)
r.setMscope(r.getContent());
vv.visitDictionaryEntry("reflink", r.getMscope().replace(';', '-'));
} else if (nnn.equals("see") && ovv instanceof SeeType) {
SeeType see = (SeeType) ovv;
if (see.getTarget() != null && !see.getTarget().equals("x-self"))
throw new RuntimeException(see.getTarget());
vv.visitDictionaryEntry("dict", see.getContent());
} else if (nnn.equals("bib_link") && ovv instanceof BibLinkType) {
BibLinkType bl = (BibLinkType) ovv;
Visitor<RuntimeException> fn = vv.visitFootnote();
fn.visitText(FormattedText.XREF_MARKER);
BookID bid = BookID.fromZefId(Integer.parseInt(bl.getBn()));
int chapter = Integer.parseInt(bl.getCn1());
fn.visitCrossReference(bid.getOsisID(), bid, chapter, bl.getVn1(), chapter, bl.getVn1()).visitText(bid.getOsisID() + " " + chapter + ":" + bl.getVn1());
} else if (nnn.equals("greek") && ovv instanceof String) {
vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "tag", "greek").visitText(normalize((String) ovv, false));
} else if (nnn.equals("em") && ovv instanceof String) {
vvv.visitFormattingInstruction(FormattingInstructionKind.ITALIC).visitText(normalize((String) ovv, false));
} else if (nnn.equals("strong") && ovv instanceof String) {
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText(normalize((String) ovv, false));
} else if (nnn.equals("q") && ovv instanceof String) {
vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "tag", "q").visitText(normalize((String) ovv, false));
} else {
throw new RuntimeException(nnn + "/" + ovv.getClass().getName());
}
} else {
throw new RuntimeException(oo.getClass().getName());
}
}
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (v instanceof String || v instanceof MyAnyType) {
Visitor<RuntimeException> vvvv;
boolean addParagraph = false;
if (nn.equals("title")) {
vvvv = vv.visitHeadline(1);
} else if (nn.equals("strong_id")) {
vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "strongid");
vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Strong-ID: ");
addParagraph = true;
} else if (nn.equals("transliteration")) {
vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "transliteration");
vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Transliteration: ");
addParagraph = true;
} else if (nn.equals("pronunciation")) {
vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "pronunciation");
vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Pronunciation: ");
addParagraph = true;
} else {
throw new RuntimeException(nn);
}
if (v instanceof MyAnyType) {
parseElement(vvvv, (MyAnyType) v);
} else if (v instanceof String) {
vvvv.visitText(normalize((String) v, false));
}
if (addParagraph) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
}
} else {
throw new RuntimeException(nn + "/" + v.getClass().getName());
}
} else {
throw new RuntimeException("" + s.getClass());
}
}
prolog.trimWhitespace();
prolog.finished();
}
return result;
}
Aggregations