Search in sources :

Example 6 with Visitor

use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.

the class Diffable method parseDiffable.

private void parseDiffable(Visitor<RuntimeException> visitor, String line) throws IOException {
    int lastPos = 0, pos = line.indexOf('<');
    List<Visitor<RuntimeException>> visitorStack = new ArrayList<Visitor<RuntimeException>>();
    while (pos != -1) {
        if (pos > lastPos) {
            visitor.visitText(line.substring(lastPos, pos));
        }
        int endPos = line.indexOf('>', pos);
        if (endPos == -1)
            throw new IOException("Unclosed tag: " + line.substring(pos));
        String tag = line.substring(pos + 1, endPos);
        if (tag.length() > 1 && tag.endsWith("/"))
            tag = tag.substring(0, tag.length() - 1);
        Map<String, String> tagArgs = new HashMap<String, String>();
        lastPos = endPos + 1;
        if (tag.contains(" ")) {
            int tpos = tag.indexOf(' ');
            while (tpos < tag.length()) {
                if (tag.charAt(tpos) == ' ')
                    tpos++;
                int aspos = tag.indexOf("=\"", tpos);
                int aepos = tag.indexOf("\"", aspos + 2);
                if (aspos == -1 || aepos == -1)
                    throw new IOException("Malformed tag: <" + tag + ">");
                tagArgs.put(tag.substring(tpos, aspos), tag.substring(aspos + 2, aepos));
                tpos = aepos + 1;
            }
            tag = tag.substring(0, tag.indexOf(' '));
        }
        if (tag.startsWith("/")) {
            visitor = visitorStack.remove(visitorStack.size() - 1);
        } else if (tag.length() == 1 && tag.charAt(0) >= 'a' && tag.charAt(0) <= 'z') {
            visitorStack.add(visitor);
            visitor = visitor.visitFormattingInstruction(FormattingInstructionKind.fromChar(tag.charAt(0)));
        } else if (tag.length() == 2 && tag.startsWith("h") && tag.charAt(1) >= '1' && tag.charAt(1) <= '9') {
            visitorStack.add(visitor);
            visitor = visitor.visitHeadline(tag.charAt(1) - '0');
        } else if (tag.startsWith("raw:")) {
            validateTagArgs(tag, tagArgs, "mode");
            int markerPos = line.indexOf("</" + tag + ">", lastPos);
            visitor.visitRawHTML(RawHTMLMode.valueOf(tagArgs.get("mode")), line.substring(lastPos, markerPos));
            lastPos = markerPos + tag.length() + 3;
        } else {
            switch(tag) {
                case "<":
                    visitor.visitText("<");
                    break;
                case "fn":
                    visitorStack.add(visitor);
                    visitor = visitor.visitFootnote();
                    break;
                case "css":
                    validateTagArgs(tag, tagArgs, "style");
                    visitorStack.add(visitor);
                    visitor = visitor.visitCSSFormatting(tagArgs.get("style"));
                    break;
                case "vs":
                    visitor.visitVerseSeparator();
                    break;
                case "br":
                    validateTagArgs(tag, tagArgs, "kind");
                    visitor.visitLineBreak(LineBreakKind.valueOf(tagArgs.get("kind")));
                    break;
                case "grammar":
                    validateTagArgs(tag, tagArgs, "strong", "rmac", "idx");
                    visitorStack.add(visitor);
                    visitor = visitor.visitGrammarInformation(intArray(tagArgs.get("strong")), tagArgs.get("rmac").length() == 0 ? null : tagArgs.get("rmac").split(","), intArray(tagArgs.get("idx")));
                    break;
                case "dict":
                    validateTagArgs(tag, tagArgs, "dictionary", "entry");
                    visitorStack.add(visitor);
                    visitor = visitor.visitDictionaryEntry(tagArgs.get("dictionary"), tagArgs.get("entry"));
                    break;
                case "var":
                    validateTagArgs(tag, tagArgs, "vars");
                    visitorStack.add(visitor);
                    visitor = visitor.visitVariationText(tagArgs.get("vars").split(","));
                    break;
                case "extra":
                    validateTagArgs(tag, tagArgs, "prio", "category", "key", "value");
                    visitorStack.add(visitor);
                    visitor = visitor.visitExtraAttribute(ExtraAttributePriority.valueOf(tagArgs.get("prio")), tagArgs.get("category"), tagArgs.get("key"), tagArgs.get("value"));
                    break;
                case "xref":
                    validateTagArgs(tag, tagArgs, "abbr", "id", "chapters", "verses");
                    String[] chapters = tagArgs.get("chapters").split(":");
                    String[] verses = tagArgs.get("verses").split(":");
                    if (chapters.length != 2 || verses.length != 2)
                        throw new IOException("Malformed \"abbr\" tag arguments: " + tagArgs);
                    visitorStack.add(visitor);
                    visitor = visitor.visitCrossReference(tagArgs.get("abbr"), BookID.fromOsisId(tagArgs.get("id")), Integer.parseInt(chapters[0]), verses[0], Integer.parseInt(chapters[1]), verses[1]);
                    break;
                default:
                    throw new IOException("Unsupported tag: " + tag);
            }
        }
        pos = line.indexOf('<', lastPos);
    }
    if (lastPos < line.length())
        visitor.visitText(line.substring(lastPos));
    if (visitorStack.size() > 0)
        throw new RuntimeException("Unclosed tags: " + line);
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 7 with Visitor

use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

@Override
public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
    metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    bible.getBooks().add(metadata.getBook());
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
                metadata.finished();
            }
            line = br.readLine().trim();
        }
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            }
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url = m.group(1);
                String shortName = replaceEntities(m.group(2));
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                    bookIndex++;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                    jcIndex++;
                } else {
                    throw new IOException(url);
                }
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            }
            line = br.readLine().trim();
        }
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        bible.getBooks().add(vorwort);
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
                }
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<p>&nbsp;</p>");
        }
        vorwort.getChapters().get(0).getProlog().finished();
    }
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            bible.getBooks().add(bk);
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                    prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            }
            if (firstProlog)
                throw new IOException(line);
            prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            prolog.finished();
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                    continue;
                }
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                }
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                }
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                        continue;
                    }
                }
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                    hl.getAppendVisitor().visitText(replaceEntities(headline));
                    headlines.add(hl);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    }
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    }
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    }
                    for (Headline h : headlines) {
                        h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
                    }
                    headlines.clear();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                    currentChapter.getVerses().add(currentVerse);
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    bk.getChapters().add(currentChapter);
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        currentChapter.setProlog(prolog);
                        prolog = null;
                    }
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    }
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                }
                if (!newFootnotes.isEmpty()) {
                    footnotes.addAll(newFootnotes);
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                            footnoteVerses.add(currentVerse.getNumber());
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                        }
                    }
                }
                if (restLine != null)
                    line = restLine;
                else
                    line = skipLines(br);
            }
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
                    vv.trimWhitespace();
                    vv.finished();
                }
            }
        }
    }
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    bible.getBooks().add(anhang);
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        }
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                }
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                        break;
                    }
                }
                bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                vv.visitLineBreak(LineBreakKind.NEWLINE);
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
        }
    }
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
    vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                    }
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    throw new IOException(line);
                }
                line = skipLines(br);
            }
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
        }
    }
    anhang.getChapters().get(0).getProlog().trimWhitespace();
    anhang.getChapters().get(0).getProlog().finished();
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) Date(java.util.Date) BufferedReader(java.io.BufferedReader) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat) Verse(biblemulticonverter.data.Verse)

Example 8 with Visitor

use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.

the class RoundtripXML method parseContent.

private void parseContent(Visitor<RuntimeException> visitor, List<Serializable> contentList) throws IOException {
    for (Serializable content : contentList) {
        if (content instanceof String) {
            visitor.visitText((String) content);
        } else if (content instanceof JAXBElement<?>) {
            Object value = ((JAXBElement<?>) content).getValue();
            Visitor<RuntimeException> next;
            if (value instanceof FormattedTextType.Headline) {
                next = visitor.visitHeadline(((FormattedTextType.Headline) value).getDepth());
            } else if (value instanceof FormattedTextType.Footnote) {
                next = visitor.visitFootnote();
            } else if (value instanceof FormattedTextType.CrossReference) {
                FormattedTextType.CrossReference xr = (FormattedTextType.CrossReference) value;
                next = visitor.visitCrossReference(xr.getBookAbbr(), BookID.fromOsisId(xr.getBook()), xr.getFirstChapter(), xr.getFirstVerse(), xr.getLastChapter(), xr.getLastVerse());
            } else if (value instanceof FormattedTextType.LineBreak) {
                visitor.visitLineBreak(LineBreakKind.valueOf(((FormattedTextType.LineBreak) value).getKind().name()));
                continue;
            } else if (value instanceof FormattedTextType.DictionaryEntry) {
                FormattedTextType.DictionaryEntry de = (FormattedTextType.DictionaryEntry) value;
                next = visitor.visitDictionaryEntry(de.getDictionary(), de.getEntry());
            } else if (value instanceof FormattedTextType.GrammarInformation) {
                FormattedTextType.GrammarInformation gi = (FormattedTextType.GrammarInformation) value;
                int[] strongs = null;
                if (!gi.getStrongs().isEmpty()) {
                    strongs = new int[gi.getStrongs().size()];
                    for (int i = 0; i < strongs.length; i++) {
                        strongs[i] = gi.getStrongs().get(i);
                    }
                }
                String[] rmacs = null;
                if (!gi.getRmac().isEmpty()) {
                    rmacs = (String[]) gi.getRmac().toArray(new String[gi.getRmac().size()]);
                }
                int[] sidxs = null;
                if (!gi.getSourceIndices().isEmpty()) {
                    sidxs = new int[gi.getSourceIndices().size()];
                    for (int i = 0; i < sidxs.length; i++) {
                        sidxs[i] = gi.getSourceIndices().get(i);
                    }
                }
                next = visitor.visitGrammarInformation(strongs, rmacs, sidxs);
            } else if (value instanceof FormattedTextType.FormattingInstruction) {
                next = visitor.visitFormattingInstruction(FormattingInstructionKind.valueOf(((FormattedTextType.FormattingInstruction) value).getKind().name()));
            } else if (value instanceof FormattedTextType.CssFormatting) {
                next = visitor.visitCSSFormatting(((FormattedTextType.CssFormatting) value).getCss());
            } else if (value instanceof FormattedTextType.ExtraAttribute) {
                FormattedTextType.ExtraAttribute xa = (FormattedTextType.ExtraAttribute) value;
                next = visitor.visitExtraAttribute(ExtraAttributePriority.valueOf(xa.getPrio().name()), xa.getCategory(), xa.getKey(), xa.getValue());
            } else if (value instanceof FormattedTextType.Variation) {
                List<String> vars = ((FormattedTextType.Variation) value).getVariations();
                next = visitor.visitVariationText((String[]) vars.toArray(new String[vars.size()]));
            } else if (value instanceof FormattedTextType.RawHTML) {
                FormattedTextType.RawHTML rh = (FormattedTextType.RawHTML) value;
                visitor.visitRawHTML(RawHTMLMode.valueOf(rh.getMode().name()), rh.getValue());
                continue;
            } else if (value instanceof FormattedTextType.VerseSeparator) {
                visitor.visitVerseSeparator();
                continue;
            } else {
                throw new IOException("Invalid JAXBElement value: " + value.getClass());
            }
            parseContent(next, ((FormattedTextType) value).getContent());
        } else {
            throw new IOException("Invalid content: " + content.getClass());
        }
    }
}
Also used : RawHTML(biblemulticonverter.schema.roundtripxml.FormattedTextType.RawHTML) LineBreak(biblemulticonverter.schema.roundtripxml.FormattedTextType.LineBreak) Serializable(java.io.Serializable) Visitor(biblemulticonverter.data.FormattedText.Visitor) RawHTML(biblemulticonverter.schema.roundtripxml.FormattedTextType.RawHTML) CrossReference(biblemulticonverter.schema.roundtripxml.FormattedTextType.CrossReference) FormattedTextType(biblemulticonverter.schema.roundtripxml.FormattedTextType) CssFormatting(biblemulticonverter.schema.roundtripxml.FormattedTextType.CssFormatting) IOException(java.io.IOException) CrossReference(biblemulticonverter.schema.roundtripxml.FormattedTextType.CrossReference) Variation(biblemulticonverter.schema.roundtripxml.FormattedTextType.Variation)

Example 9 with Visitor

use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.

the class StrippedDiffable method mergeIntroductionPrologs.

protected void mergeIntroductionPrologs(Bible bible) {
    List<FormattedText> prologBuffer = new ArrayList<FormattedText>();
    for (int i = 0; i < bible.getBooks().size(); i++) {
        Book book = bible.getBooks().get(i);
        if (book.getId().getZefID() < 0) {
            if (book.getChapters().size() == 1) {
                Chapter ch = book.getChapters().get(0);
                if (ch.getVerses().size() > 0)
                    System.out.println("WARNING: Book " + book.getAbbr() + " has verses; not merged.");
                if (ch.getProlog() != null)
                    prologBuffer.add(ch.getProlog());
                else
                    System.out.println("WARNING: Book " + book.getAbbr() + " does not have a prolog; not merged.");
            } else {
                System.out.println("WARNING: Book " + book.getAbbr() + " has " + book.getChapters().size() + " chapters; not merged.");
            }
            bible.getBooks().remove(i);
            i--;
        } else if (prologBuffer.size() > 0 && book.getChapters().size() > 0) {
            Chapter ch = book.getChapters().get(0);
            if (ch.getProlog() != null)
                prologBuffer.add(ch.getProlog());
            FormattedText newProlog = new FormattedText();
            Visitor<RuntimeException> v = newProlog.getAppendVisitor();
            ch.setProlog(newProlog);
            boolean first = true;
            for (FormattedText oldProlog : prologBuffer) {
                if (!first)
                    v.visitLineBreak(LineBreakKind.PARAGRAPH);
                first = false;
                oldProlog.accept(v);
            }
            prologBuffer.clear();
        }
    }
    if (prologBuffer.size() > 0) {
        System.out.println("WARNING: " + prologBuffer.size() + " introduction prologs after last bible book were merged to first bible book!");
        for (int i = 0; i < bible.getBooks().size(); i++) {
            Book book = bible.getBooks().get(i);
            if (book.getId().getZefID() > 0 && prologBuffer.size() > 0 && book.getChapters().size() > 0) {
                Chapter ch = book.getChapters().get(0);
                Visitor<RuntimeException> v = ch.getProlog().getAppendVisitor();
                for (FormattedText oldProlog : prologBuffer) {
                    v.visitLineBreak(LineBreakKind.PARAGRAPH);
                    oldProlog.accept(v);
                }
                break;
            }
        }
    }
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Book(biblemulticonverter.data.Book) ArrayList(java.util.ArrayList) Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText)

Example 10 with Visitor

use of biblemulticonverter.data.FormattedText.Visitor in project BibleMultiConverter by schierlm.

the class ZefDic method parseBible.

protected Bible parseBible(Dictionary doc) throws Exception {
    Bible result = new Bible(doc.getType().toString() + "@" + doc.getRefbible());
    MetadataBook metadata = new MetadataBook();
    if (doc.getDicversion() != null) {
        metadata.setValue(MetadataBookKey.version, doc.getDicversion());
    }
    if (doc.getRevision() != null) {
        metadata.setValue(MetadataBookKey.revision, doc.getRevision());
    }
    for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
        if (elem.getValue() == null)
            continue;
        String value = normalize(elem.getValue().toString(), true).trim();
        if (value.length() == 0)
            value = "-empty-";
        metadata.setValue(elem.getName().getLocalPart(), value);
    }
    metadata.finished();
    if (metadata.getKeys().size() > 0)
        result.getBooks().add(metadata.getBook());
    int counter = 0;
    for (TItem item : doc.getItem()) {
        String id = item.getId();
        String internalId = id;
        if (!id.matches(Utils.BOOK_ABBR_REGEX))
            internalId = "L" + (++counter);
        Book bk = new Book(internalId, BookID.DICTIONARY_ENTRY, id, id);
        result.getBooks().add(bk);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        Visitor<RuntimeException> vv = prolog.getAppendVisitor();
        String strongId = item.getStrongId();
        if (strongId != null) {
            Visitor<RuntimeException> vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "itemfield", "strongid");
            vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Strong-ID: ");
            vvvv.visitText(strongId);
            vv.visitLineBreak(LineBreakKind.PARAGRAPH);
        }
        for (Object s : item.getContent()) {
            if (s instanceof String) {
                if (((String) s).trim().length() > 0)
                    throw new RuntimeException((String) s);
            } else if (s instanceof JAXBElement) {
                Object v = ((JAXBElement<?>) s).getValue();
                if (!((JAXBElement<?>) s).getName().getNamespaceURI().equals("")) {
                    throw new RuntimeException(((JAXBElement<?>) s).getName().getNamespaceURI());
                }
                String nn = ((JAXBElement<?>) s).getName().getLocalPart();
                if (v instanceof TParagraph && nn.equals("description")) {
                    TParagraph para = (TParagraph) v;
                    if (para.getId() != null)
                        throw new RuntimeException(para.getId());
                    Visitor<RuntimeException> vvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "description");
                    for (Object oo : para.getContent()) {
                        if (oo instanceof String) {
                            vvv.visitText(normalize((String) oo, false));
                        } else if (oo instanceof JAXBElement) {
                            Object ovv = ((JAXBElement<?>) oo).getValue();
                            if (!((JAXBElement<?>) oo).getName().getNamespaceURI().equals("")) {
                                throw new RuntimeException(((JAXBElement<?>) oo).getName().getNamespaceURI());
                            }
                            String nnn = ((JAXBElement<?>) oo).getName().getLocalPart();
                            if (nnn.equals("br") && ovv instanceof String) {
                                if (((String) ovv).trim().length() > 0)
                                    throw new RuntimeException((String) ovv);
                                vvv.visitLineBreak(LineBreakKind.NEWLINE);
                            } else if (nnn.equals("title") && ovv instanceof String) {
                                vvv.visitHeadline(2).visitText(((String) ovv).trim().replaceAll("  +", " "));
                            } else if (nnn.equals("sub") && ovv instanceof String) {
                                vvv.visitFormattingInstruction(FormattingInstructionKind.SUBSCRIPT).visitText(normalize((String) ovv, false));
                            } else if (nnn.equals("reflink") && ovv instanceof RefLinkType) {
                                RefLinkType r = (RefLinkType) ovv;
                                if (r.getTarget() != null || r.getContent() == null || r.getContent().length() > 0)
                                    System.out.println("WARNING: Unsupported reflink attributes " + r.getTarget() + "|" + r.getContent());
                                if (r.getMscope() == null)
                                    r.setMscope(r.getContent());
                                vv.visitDictionaryEntry("reflink", r.getMscope().replace(';', '-'));
                            } else if (nnn.equals("see") && ovv instanceof SeeType) {
                                SeeType see = (SeeType) ovv;
                                if (see.getTarget() != null && !see.getTarget().equals("x-self"))
                                    throw new RuntimeException(see.getTarget());
                                vv.visitDictionaryEntry("dict", see.getContent());
                            } else if (nnn.equals("bib_link") && ovv instanceof BibLinkType) {
                                BibLinkType bl = (BibLinkType) ovv;
                                Visitor<RuntimeException> fn = vv.visitFootnote();
                                fn.visitText(FormattedText.XREF_MARKER);
                                BookID bid = BookID.fromZefId(Integer.parseInt(bl.getBn()));
                                int chapter = Integer.parseInt(bl.getCn1());
                                fn.visitCrossReference(bid.getOsisID(), bid, chapter, bl.getVn1(), chapter, bl.getVn1()).visitText(bid.getOsisID() + " " + chapter + ":" + bl.getVn1());
                            } else if (nnn.equals("greek") && ovv instanceof String) {
                                vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "tag", "greek").visitText(normalize((String) ovv, false));
                            } else if (nnn.equals("em") && ovv instanceof String) {
                                vvv.visitFormattingInstruction(FormattingInstructionKind.ITALIC).visitText(normalize((String) ovv, false));
                            } else if (nnn.equals("strong") && ovv instanceof String) {
                                vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText(normalize((String) ovv, false));
                            } else if (nnn.equals("q") && ovv instanceof String) {
                                vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "tag", "q").visitText(normalize((String) ovv, false));
                            } else {
                                throw new RuntimeException(nnn + "/" + ovv.getClass().getName());
                            }
                        } else {
                            throw new RuntimeException(oo.getClass().getName());
                        }
                    }
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (v instanceof String || v instanceof MyAnyType) {
                    Visitor<RuntimeException> vvvv;
                    boolean addParagraph = false;
                    if (nn.equals("title")) {
                        vvvv = vv.visitHeadline(1);
                    } else if (nn.equals("strong_id")) {
                        vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "strongid");
                        vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Strong-ID: ");
                        addParagraph = true;
                    } else if (nn.equals("transliteration")) {
                        vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "transliteration");
                        vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Transliteration: ");
                        addParagraph = true;
                    } else if (nn.equals("pronunciation")) {
                        vvvv = vv.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefdic", "field", "pronunciation");
                        vvvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Pronunciation: ");
                        addParagraph = true;
                    } else {
                        throw new RuntimeException(nn);
                    }
                    if (v instanceof MyAnyType) {
                        parseElement(vvvv, (MyAnyType) v);
                    } else if (v instanceof String) {
                        vvvv.visitText(normalize((String) v, false));
                    }
                    if (addParagraph) {
                        vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    }
                } else {
                    throw new RuntimeException(nn + "/" + v.getClass().getName());
                }
            } else {
                throw new RuntimeException("" + s.getClass());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    return result;
}
Also used : RefLinkType(biblemulticonverter.schema.zefdic1.RefLinkType) MetadataBook(biblemulticonverter.data.MetadataBook) TParagraph(biblemulticonverter.schema.zefdic1.TParagraph) Visitor(biblemulticonverter.data.FormattedText.Visitor) MyAnyType(biblemulticonverter.schema.zefdic1.MyAnyType) TItem(biblemulticonverter.schema.zefdic1.TItem) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText) JAXBElement(javax.xml.bind.JAXBElement) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) SeeType(biblemulticonverter.schema.zefdic1.SeeType) BibLinkType(biblemulticonverter.schema.zefdic1.BibLinkType)

Aggregations

Visitor (biblemulticonverter.data.FormattedText.Visitor)16 Book (biblemulticonverter.data.Book)8 BookID (biblemulticonverter.data.BookID)8 FormattedText (biblemulticonverter.data.FormattedText)8 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 Chapter (biblemulticonverter.data.Chapter)7 FormattingInstructionKind (biblemulticonverter.data.FormattedText.FormattingInstructionKind)6 MetadataBook (biblemulticonverter.data.MetadataBook)6 JAXBElement (javax.xml.bind.JAXBElement)6 Verse (biblemulticonverter.data.Verse)5 List (java.util.List)5 ExtraAttributePriority (biblemulticonverter.data.FormattedText.ExtraAttributePriority)4 Headline (biblemulticonverter.data.FormattedText.Headline)4 LineBreakKind (biblemulticonverter.data.FormattedText.LineBreakKind)4 RawHTMLMode (biblemulticonverter.data.FormattedText.RawHTMLMode)4 VirtualVerse (biblemulticonverter.data.VirtualVerse)4 File (java.io.File)4 Matcher (java.util.regex.Matcher)4 Bible (biblemulticonverter.data.Bible)3