use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class ESwordHTML method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
new StrippedDiffable().mergeIntroductionPrologs(bible);
String filename = exportArgs[0];
String marker = exportArgs.length == 1 ? "" : exportArgs[1];
String title = bible.getName();
try (BufferedWriter bblx = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename + ".bblx.HTM")), StandardCharsets.UTF_8));
BufferedWriter cmtx = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename + ".cmtx.HTM")), StandardCharsets.UTF_8))) {
bblx.write("<html><head>\n" + "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />\n" + "<style>\n" + "p{margin-top:0pt;margin-bottom:0pt;}\n" + "b.headline{font-size:14pt;}\n" + "sup.str{color:#008000;}\n" + ".xref {color:#008000;font-weight:bold;text-decoration:underline;}\n" + "</style>\n" + "</head><body>\n" + "<p>#define description=" + title + marker + "</p>\n" + "<p>#define abbreviation=ChangeMe" + marker + "</p>\n" + "<p>#define comments=Exported by BibleMultiConverter" + marker + "</p>\n" + "<p>#define version=1" + marker + "</p>\n" + "<p>#define strong=0" + marker + "</p>\n" + "<p>#define right2left=0" + marker + "</p>\n" + "<p>#define ot=1" + marker + "</p>\n" + "<p>#define nt=1" + marker + "</p>\n" + "<p>#define font=DEFAULT" + marker + "</p>\n" + "<p>#define apocrypha=1" + marker + "</p>\n" + "<p><span style=\"background-color:#C80000;\">\u00F7</span>" + marker + "</p>\n");
cmtx.write("<html><head>\n" + "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />\n" + "<style>\n" + "p{margin-top:0pt;margin-bottom:0pt;}\n" + "p.spc{margin-top:10pt;margin-bottom:0pt;}\n" + "p.prologend{border-width:1px;border-top-style:none;border-right-style:none;border-bottom-style:solid;border-left-style:none;border-color:black}\n" + "b.headline{font-size:14pt;}\n" + "sup.str{color:#008000;}\n" + "</style></head><body>\n" + "<p>#define description=" + title + " (Kommentar)" + marker + "</p>\n" + "<p>#define abbreviation=ChangeMe" + marker + "</p>\n" + "<p>#define comments=Exported by BibleMultiConverter" + marker + "</p>\n" + "<p>#define version=1" + marker + "</p>\r\n");
for (Book book : bible.getBooks()) {
ESwordBookInfo info = BOOK_INFO_BY_ID.get(book.getId());
if (info == null) {
System.out.println("WARNING: Skipping book " + book.getAbbr());
continue;
}
String bname = info.name;
int cnumber = 0;
for (Chapter chapter : book.getChapters()) {
cnumber++;
if (cnumber > info.versification.length) {
System.out.println("WARNING: Skipping chapter " + book.getAbbr() + " " + cnumber);
continue;
}
int maxVerse = info.versification[cnumber - 1];
BitSet allowedNumbers = new BitSet(maxVerse + 1);
allowedNumbers.set(1, maxVerse + 1);
FormattedText prolog = chapter.getProlog();
for (VirtualVerse vv : chapter.createVirtualVerses(allowedNumbers)) {
int vnumber = vv.getNumber();
String vref = bname + " " + cnumber + ":" + vnumber;
StringBuilder parsedVerse = new StringBuilder();
StringBuilder parsedCommentary = new StringBuilder();
for (Headline hl : vv.getHeadlines()) {
parsedVerse.append("<b class=\"headline\">");
hl.accept(new ESwordVisitor(parsedVerse, marker, book.getId().isNT(), "", "", null, null));
parsedVerse.append("</b><br />");
}
for (Verse v : vv.getVerses()) {
if (!v.getNumber().equals("" + vnumber)) {
parsedVerse.append("<b>(" + v.getNumber() + ")</b>");
}
StringBuilder comments = new StringBuilder();
if (prolog != null) {
prolog.accept(new ESwordVisitor(comments, marker, book.getId().isNT(), "", "", "<i>", "</i>"));
comments.append(marker + "</p>\n<!--keep--><p class=\"prologend\"> " + marker + "</p>\n<p class=\"spc\">");
}
v.accept(new ESwordVisitor(parsedVerse, marker, book.getId().isNT(), "", "", null, null));
v.accept(new ESwordVisitor(comments, marker, book.getId().isNT(), "<b>", "</b>", "", ""));
if (comments.toString().contains("<!--keep-->"))
parsedCommentary.append(comments.toString());
}
if (parsedVerse.length() == 0)
parsedVerse.append("-");
bblx.write("<p>" + vref + " " + parsedVerse.toString() + marker + "</p>\n");
if (parsedCommentary.length() > 0)
cmtx.write("<p><span style=\"background-color:#FF0000;\">\u00F7</span>" + vref + marker + "</p>\n<p>" + parsedCommentary.toString() + marker + "</p>\n");
prolog = null;
}
}
}
bblx.write("</body></html>");
cmtx.write("</body></html>");
}
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class SWORD method doImport.
protected Bible doImport(Book book) throws Exception {
OSISHelper helper = new OSISHelper();
Bible result = new Bible(book.getName());
TransformerHandler th = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
Map<BookID, biblemulticonverter.data.Book> parsedBooks = new EnumMap<>(BookID.class);
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
List<Verse> allVerses = new ArrayList<>();
Verse nextCandidate = null;
for (Iterator<?> iter = book.getGlobalKeyList().iterator(); iter.hasNext(); ) {
Verse v = (Verse) iter.next();
while (nextCandidate != null && !nextCandidate.equals(v)) {
if (book.contains(nextCandidate)) {
System.out.println("WARNING: Verse (after) skipped by iterator: " + nextCandidate);
allVerses.add(nextCandidate);
}
nextCandidate = nextCandidate.getVersification().add(nextCandidate, 1);
}
Verse prevCandidate = v.getVersification().subtract(v, 1);
List<Verse> versesSkippedBefore = new ArrayList<>();
while (prevCandidate != null && !allVerses.contains(prevCandidate) && !versesSkippedBefore.contains(prevCandidate)) {
versesSkippedBefore.add(0, prevCandidate);
prevCandidate = prevCandidate.getVersification().subtract(prevCandidate, 1);
}
for (Verse vv : versesSkippedBefore) {
if (book.contains(vv)) {
System.out.println("WARNING: Verse (before) skipped by iterator: " + vv);
allVerses.add(vv);
}
}
allVerses.add(v);
nextCandidate = v.getVersification().add(v, 1);
}
while (nextCandidate != null) {
if (book.contains(nextCandidate)) {
System.out.println("WARNING: Verse (at end) skipped by iterator: " + nextCandidate);
allVerses.add(nextCandidate);
}
Verse nextNextCandidate = nextCandidate.getVersification().add(nextCandidate, 1);
if (nextCandidate.equals(nextNextCandidate))
break;
nextCandidate = nextNextCandidate;
}
for (Verse v : allVerses) {
BookID bkid = biblemulticonverter.sword.BookMapping.MAPPING.get(v.getBook());
biblemulticonverter.data.Book bk = parsedBooks.get(bkid);
if (!parsedBooks.containsKey(bkid)) {
bk = new biblemulticonverter.data.Book(bkid.getOsisID().replace("x-Intr", "Intr"), bkid, bkid.getEnglishName(), bkid.getEnglishName());
parsedBooks.put(bkid, bk);
bk.getChapters().add(new Chapter());
result.getBooks().add(bk);
}
int chapterNum = v.getChapter(), verseNum = v.getVerse();
while (bk.getChapters().size() < chapterNum) bk.getChapters().add(new Chapter());
Chapter chapter = bk.getChapters().get(chapterNum == 0 ? 0 : chapterNum - 1);
FormattedText verse;
if (verseNum == 0) {
verse = new FormattedText();
if (chapter.getProlog() != null) {
chapter.getProlog().accept(verse.getAppendVisitor());
}
chapter.setProlog(verse);
} else {
if (chapterNum == 0)
throw new IllegalStateException("Verse " + verseNum + " in chapter 0 is invalid");
verse = new biblemulticonverter.data.Verse("" + verseNum);
chapter.getVerses().add((biblemulticonverter.data.Verse) verse);
}
Element root = doc.createElement("verse");
th.setResult(new DOMResult(root));
new BookData(book, v).getSAXEventProvider().provideSAXEvents(th);
if (root.getChildNodes().getLength() == 1 && root.getFirstChild() instanceof Element && root.getFirstChild().getNodeName().equals("div") && root.getFirstChild().getChildNodes().getLength() >= 1 && root.getFirstChild().getFirstChild().getNodeName().equals("title")) {
Element div = (Element) root.getFirstChild();
root.removeChild(div);
div.removeChild(div.getFirstChild());
while (div.getFirstChild() != null) {
Node child = div.getFirstChild();
div.removeChild(child);
root.appendChild(child);
}
} else {
throw new RuntimeException("Unexpected OSIS structure!");
}
helper.handleVerse(root, verse);
if (verse.getElementTypes(1).length() == 0) {
System.out.println("WARNING: Empty verse " + bk.getAbbr() + " " + chapterNum + ":" + verseNum);
if (verse instanceof biblemulticonverter.data.Verse)
chapter.getVerses().remove(verse);
else
chapter.setProlog(null);
}
}
for (biblemulticonverter.data.Book bk : parsedBooks.values()) {
while (!bk.getChapters().isEmpty()) {
Chapter ch = bk.getChapters().get(bk.getChapters().size() - 1);
if (ch.getProlog() == null && ch.getVerses().isEmpty()) {
bk.getChapters().remove(ch);
} else {
break;
}
}
if (bk.getChapters().isEmpty()) {
result.getBooks().remove(bk);
}
}
return result;
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class NeUeParser method getPrologVisitor.
private Visitor<RuntimeException> getPrologVisitor(Book book) {
FormattedText prolog = new FormattedText();
book.getChapters().add(new Chapter());
book.getChapters().get(0).setProlog(prolog);
return prolog.getAppendVisitor();
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class OSIS method parseChapter.
private void parseChapter(String chapterName, Element osisChapter, Chapter chapter, List<Element> unclosedElements) {
int lastVerse = -1;
List<Headline> headlines = new ArrayList<Headline>();
for (Node node = osisChapter.getFirstChild(); node != null; node = node.getNextSibling()) {
boolean startProlog = false;
if (node instanceof Text) {
if (node.getTextContent().trim().length() == 0)
continue;
if (lastVerse == -1) {
startProlog = true;
} else {
printWarning("WARNING: Non-whitespace at chapter level: " + node.getTextContent());
}
} else if (node instanceof Element) {
Element elem = (Element) node;
if (elem.getNodeName().equals("title")) {
Headline hl = new Headline(2);
if (elem.getAttribute("type").equals("chapter")) {
hl = new Headline(1);
}
if (elem.getChildNodes().getLength() == 1 && elem.getFirstChild() instanceof Text) {
String text = elem.getFirstChild().getTextContent();
if (!text.equals(text.trim())) {
printWarning("WARNING: Whitespace at beginning/end of headline: '" + text + "'");
elem.getFirstChild().setNodeValue(text.trim());
}
}
convertFromMilestoned(elem, unclosedElements);
parseFormattedText(null, elem, hl);
if (hl.getElementTypes(1).length() == 0) {
printWarning("WARNING: Empty headline in " + chapterName);
} else {
headlines.add(hl);
}
} else if (elem.getNodeName().equals("verse")) {
String osisID = elem.getAttribute("osisID");
if (!elem.getAttribute("sID").isEmpty() || !elem.getAttribute("sID").isEmpty())
throw new IllegalArgumentException("verse should have been de-milestoned already.");
if (osisID.isEmpty())
throw new IllegalStateException("Verse without osisID");
if (!osisID.startsWith(chapterName + "."))
throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
String vnumber = osisID.substring(chapterName.length() + 1);
if (osisID.contains(" ")) {
vnumber = vnumber.substring(0, vnumber.indexOf(' '));
lastVerse = Integer.parseInt(vnumber);
int nextInRange = lastVerse + 1;
boolean first = true;
for (String part : osisID.split(" ")) {
if (first) {
first = false;
continue;
}
if (!part.startsWith(chapterName + "."))
throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
String partNumber = part.substring(chapterName.length() + 1);
vnumber = vnumber + "." + partNumber;
if (partNumber.equals("" + nextInRange)) {
nextInRange++;
} else {
nextInRange = -1;
}
}
if (nextInRange != -1) {
vnumber = lastVerse + "-" + (nextInRange - 1);
}
} else {
lastVerse = Integer.parseInt(vnumber);
}
Verse verse = new Verse(vnumber);
warningContext = osisID;
for (Headline hl : headlines) {
hl.accept(verse.getAppendVisitor().visitHeadline(hl.getDepth()));
}
headlines.clear();
chapter.getVerses().add(verse);
convertFromMilestoned(elem, unclosedElements);
parseFormattedText(osisID, elem, verse);
verse.trimWhitespace();
verse.finished();
if (verse.getElementTypes(1).length() == 0) {
printWarning("WARNING: Empty verse " + osisID);
chapter.getVerses().remove(verse);
}
warningContext += " (after closing)";
} else if (lastVerse == -1) {
startProlog = true;
} else {
printWarning("WARNING: " + elem.getNodeName() + " at invalid location");
}
}
if (startProlog) {
Element holder = osisChapter.getOwnerDocument().createElement("prolog");
osisChapter.insertBefore(holder, node);
while (holder.getNextSibling() != null && !holder.getNextSibling().getNodeName().equals("verse")) {
holder.appendChild(holder.getNextSibling());
}
lastVerse = 0;
FormattedText prolog = new FormattedText();
chapter.setProlog(prolog);
for (Headline hl : headlines) {
hl.accept(prolog.getAppendVisitor().visitHeadline(hl.getDepth()));
}
headlines.clear();
convertFromMilestoned(holder, unclosedElements);
parseFormattedText(null, holder, prolog);
prolog.trimWhitespace();
prolog.finished();
node = holder;
}
}
if (headlines.size() > 0)
printWarning("WARNING: Unused headlines: " + headlines.size());
}
Aggregations