Search in sources :

Example 26 with Chapter

use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

@Override
public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
    metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    bible.getBooks().add(metadata.getBook());
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
                metadata.finished();
            }
            line = br.readLine().trim();
        }
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            }
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url = m.group(1);
                String shortName = replaceEntities(m.group(2));
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                    bookIndex++;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                    jcIndex++;
                } else {
                    throw new IOException(url);
                }
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            }
            line = br.readLine().trim();
        }
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        bible.getBooks().add(vorwort);
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
                }
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<p>&nbsp;</p>");
        }
        vorwort.getChapters().get(0).getProlog().finished();
    }
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            bible.getBooks().add(bk);
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                    prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            }
            if (firstProlog)
                throw new IOException(line);
            prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            prolog.finished();
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                    continue;
                }
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                }
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                }
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                        continue;
                    }
                }
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                    hl.getAppendVisitor().visitText(replaceEntities(headline));
                    headlines.add(hl);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    }
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    }
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    }
                    for (Headline h : headlines) {
                        h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
                    }
                    headlines.clear();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                    currentChapter.getVerses().add(currentVerse);
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    bk.getChapters().add(currentChapter);
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        currentChapter.setProlog(prolog);
                        prolog = null;
                    }
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    }
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                }
                if (!newFootnotes.isEmpty()) {
                    footnotes.addAll(newFootnotes);
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                            footnoteVerses.add(currentVerse.getNumber());
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                        }
                    }
                }
                if (restLine != null)
                    line = restLine;
                else
                    line = skipLines(br);
            }
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
                    vv.trimWhitespace();
                    vv.finished();
                }
            }
        }
    }
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    bible.getBooks().add(anhang);
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        }
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                }
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                        break;
                    }
                }
                bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                vv.visitLineBreak(LineBreakKind.NEWLINE);
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
        }
    }
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
    vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                    }
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    throw new IOException(line);
                }
                line = skipLines(br);
            }
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
        }
    }
    anhang.getChapters().get(0).getProlog().trimWhitespace();
    anhang.getChapters().get(0).getProlog().finished();
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) Date(java.util.Date) BufferedReader(java.io.BufferedReader) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat) Verse(biblemulticonverter.data.Verse)

Example 27 with Chapter

use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.

the class NeUeParser method getPrologVisitor.

private Visitor<RuntimeException> getPrologVisitor(Book book) {
    FormattedText prolog = new FormattedText();
    book.getChapters().add(new Chapter());
    book.getChapters().get(0).setProlog(prolog);
    return prolog.getAppendVisitor();
}
Also used : Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText)

Example 28 with Chapter

use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.

the class OSIS method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    Element osis = doc.createElement("osis");
    doc.appendChild(osis);
    osis.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
    osis.setAttribute("xmlns", "http://www.bibletechnologies.net/2003/OSIS/namespace");
    osis.setAttribute("xsi:schemaLocation", "http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd");
    Element osisText = doc.createElement("osisText");
    osis.appendChild(osisText);
    osisText.setAttribute("canonical", "true");
    osisText.setAttribute("osisIDWork", "Exported");
    osisText.appendChild(buildHeader(doc, bible.getName()));
    for (Book bk : bible.getBooks()) {
        Element book = doc.createElement("div");
        osisText.appendChild(book);
        book.setAttribute("type", "book");
        book.setAttribute("canonical", "true");
        book.setAttribute("osisID", bk.getId().getOsisID());
        Element bookTitle = doc.createElement("title");
        book.appendChild(bookTitle);
        bookTitle.setAttribute("type", "main");
        bookTitle.appendChild(doc.createTextNode(bk.getLongName()));
        int cnumber = 0;
        for (Chapter chp : bk.getChapters()) {
            cnumber++;
            Element chapter = doc.createElement("chapter");
            book.appendChild(chapter);
            chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
            OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
            Element elem = doc.createElement("title");
            chapter.appendChild(elem);
            elem.setAttribute("type", "chapter");
            elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
            if (chp.getProlog() != null) {
                chp.getProlog().accept(visitor);
            }
            for (VirtualVerse vv : chp.createVirtualVerses()) {
                String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
                for (Headline hl : vv.getHeadlines()) {
                    hl.accept(visitor.visitHeadline(hl.getDepth()));
                }
                Element verse = doc.createElement("verse");
                chapter.appendChild(verse);
                verse.setAttribute("osisID", osisID);
                for (Verse v : vv.getVerses()) {
                    if (!v.getNumber().equals("" + vv.getNumber())) {
                        elem = doc.createElement("hi");
                        verse.appendChild(elem);
                        elem.setAttribute("type", "bold");
                        elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
                    }
                    v.accept(new OSISVisitor(verse, bk.getId().isNT()));
                }
            }
        }
    }
    String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
    if (!milestonedElementNames.equals("-")) {
        Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
        Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
        unsupportedMilestonedElements.removeAll(GENERATED_MILESTONEABLE_ELEMENTS);
        if (!unsupportedMilestonedElements.isEmpty()) {
            for (String elem : unsupportedMilestonedElements) {
                if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
                    System.out.println("ERROR: " + elem + " may not be milestoned");
                } else {
                    System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
                }
            }
            throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
        }
        convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
    }
    TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) VirtualVerse(biblemulticonverter.data.VirtualVerse) StreamResult(javax.xml.transform.stream.StreamResult) Element(org.w3c.dom.Element) Chapter(biblemulticonverter.data.Chapter) Document(org.w3c.dom.Document) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) File(java.io.File) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse) HashSet(java.util.HashSet)

Example 29 with Chapter

use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.

the class OSIS method parseBook.

private void parseBook(String bookName, Element osisBook, Book bibleBook) {
    warningContext = bookName;
    convertToMilestoned(osisBook);
    List<Element> unclosedElements = new ArrayList<Element>();
    for (Node node = osisBook.getFirstChild(); node != null; node = node.getNextSibling()) {
        if (node instanceof Text) {
            if (((Text) node).getTextContent().trim().length() > 0)
                printWarning("WARNING: Non-whitespace text at book level");
        } else if (node instanceof Comment) {
            continue;
        } else {
            Element elem = (Element) node;
            if (elem.getNodeName().equals("title")) {
                if (elem.getAttribute("type").equals("main")) {
                    if (!getTextChildren(elem).equals(bibleBook.getLongName())) {
                        printWarning("WARNING: More than one book title found");
                    }
                } else {
                    printWarning("WARNING: invalid book level title type: " + elem.getAttribute("type"));
                }
            } else if (elem.getNodeName().equals("chapter")) {
                String chapterName = elem.getAttribute("osisID");
                if (chapterName.contains("-")) {
                    chapterName = chapterName.substring(0, chapterName.indexOf("-"));
                    printWarning("WARNING: Invalid chapter OSIS reference: " + elem.getAttribute("osisID") + ", using " + chapterName);
                }
                if (!chapterName.startsWith(bookName + ".")) {
                    throw new IllegalStateException("Invalid chapter " + chapterName + " of book " + bookName);
                } else {
                    int cnumber = Integer.parseInt(chapterName.substring(bookName.length() + 1));
                    while (bibleBook.getChapters().size() < cnumber) {
                        bibleBook.getChapters().add(new Chapter());
                    }
                    warningContext = chapterName;
                    parseChapter(chapterName, elem, bibleBook.getChapters().get(cnumber - 1), unclosedElements);
                    warningContext = bookName;
                }
            } else {
                printWarning("WARNING: invalid book level tag: " + elem.getNodeName());
            }
        }
    }
    if (unclosedElements.size() > 0) {
        StringBuilder message = new StringBuilder("WARNING: Unclosed milestoned elements:");
        for (Element elem : unclosedElements) {
            message.append(" " + elem.getNodeName() + "[" + elem.getAttribute("sID") + "]");
        }
        printWarning(message.toString());
    }
}
Also used : Comment(org.w3c.dom.Comment) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) Chapter(biblemulticonverter.data.Chapter) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText)

Example 30 with Chapter

use of biblemulticonverter.data.Chapter in project BibleMultiConverter by schierlm.

the class RoundtripHTML method doImport.

@Override
public Bible doImport(File inputDir) throws Exception {
    Bible bible;
    // metadata
    try (BufferedReader br = createReader(inputDir, "metadata.js")) {
        String line = br.readLine();
        br.readLine();
        bible = new Bible(line.substring(13, line.length() - 2).replace("\\\"", "\"").replace("\\\\", "\\"));
        Map<String, Object> fieldMap = new HashMap<String, Object>();
        while ((line = br.readLine()) != null) {
            if (line.startsWith("}")) {
                Book bk = new Book((String) fieldMap.get("abbr"), BookID.fromOsisId((String) fieldMap.get("osis")), (String) fieldMap.get("short"), (String) fieldMap.get("long"));
                for (int i = 0; i < (Integer) fieldMap.get("chapters"); i++) {
                    bk.getChapters().add(new Chapter());
                }
                bible.getBooks().add(bk);
                continue;
            }
            int pos = line.indexOf(":");
            String key = line.substring(0, pos);
            String value = line.substring(pos + 1);
            if (value.endsWith(","))
                value = value.substring(0, value.length() - 1);
            if (value.startsWith("\"") && value.endsWith("\"")) {
                fieldMap.put(key, value.substring(1, value.length() - 1).replace("\\\"", "\"").replace("\\\\", "\\"));
            } else if (value.equals("true") || value.equals("false")) {
                fieldMap.put(key, Boolean.parseBoolean(value));
            } else {
                fieldMap.put(key, Integer.parseInt(value));
            }
        }
    }
    // chapters
    for (Book bk : bible.getBooks()) {
        int cnumber = 0;
        for (Chapter ch : bk.getChapters()) {
            cnumber++;
            try (BufferedReader br = createReader(inputDir, getTypeDir(bk.getId()) + "/" + bk.getAbbr() + "_" + cnumber + ".html")) {
                String line;
                List<FormattedText.Visitor<RuntimeException>> footnotes = new ArrayList<>();
                while ((line = br.readLine()) != null) {
                    if (line.equals("<div class=\"biblehtmlcontent prolog\">")) {
                        line = br.readLine();
                        FormattedText prolog = new FormattedText();
                        int end = parseLine(prolog.getAppendVisitor(), line, 0, footnotes);
                        ch.setProlog(prolog);
                        if (end != line.length())
                            throw new IOException(line.substring(end));
                        line = br.readLine();
                        if (!line.equals("</div>"))
                            throw new IOException(line);
                    } else if (line.equals("<div class=\"biblehtmlcontent verses\" id=\"verses\">")) {
                        while ((line = br.readLine()) != null) {
                            if (line.equals("</div>"))
                                break;
                            if (!line.startsWith("<div class=\"v\" id=\"v") || !line.endsWith("</div>"))
                                throw new IOException(line);
                            line = line.substring(20, line.length() - 6);
                            int pos = line.indexOf("\">");
                            Verse v = new Verse(line.substring(0, pos));
                            int end = parseLine(v.getAppendVisitor(), line, pos + 2, footnotes);
                            if (end != line.length())
                                throw new IOException(line.substring(end));
                            ch.getVerses().add(v);
                        }
                        if (!line.equals("</div>"))
                            throw new IOException(line);
                    } else if (line.equals("<div class=\"biblehtmlcontent footnotes\">")) {
                        for (int i = 0; i < footnotes.size(); i++) {
                            line = br.readLine();
                            String prefix = "<div class=\"fn\"><sup class=\"fnt\"><a name=\"fn" + (i + 1) + "\" href=\"#fnm" + (i + 1) + "\">" + (i + 1) + "</a></sup> ";
                            if (!line.startsWith(prefix) || !line.endsWith("</div>"))
                                throw new IOException(line);
                            line = line.substring(prefix.length(), line.length() - 6);
                            int end = parseLine(footnotes.get(i), line, 0, null);
                            if (end != line.length())
                                throw new IOException(line.substring(end));
                        }
                        line = br.readLine();
                        if (!line.equals("</div>"))
                            throw new IOException(line);
                    }
                }
                if (ch.getProlog() != null)
                    ch.getProlog().finished();
                for (Verse v : ch.getVerses()) v.finished();
            }
        }
    }
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) HashMap(java.util.HashMap) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) ArrayList(java.util.ArrayList) FormattedText(biblemulticonverter.data.FormattedText) IOException(java.io.IOException) Book(biblemulticonverter.data.Book) BufferedReader(java.io.BufferedReader) Verse(biblemulticonverter.data.Verse)

Aggregations

Chapter (biblemulticonverter.data.Chapter)64 Book (biblemulticonverter.data.Book)60 Verse (biblemulticonverter.data.Verse)57 FormattedText (biblemulticonverter.data.FormattedText)28 VirtualVerse (biblemulticonverter.data.VirtualVerse)25 MetadataBook (biblemulticonverter.data.MetadataBook)22 BookID (biblemulticonverter.data.BookID)20 ArrayList (java.util.ArrayList)20 Bible (biblemulticonverter.data.Bible)19 Headline (biblemulticonverter.data.FormattedText.Headline)19 File (java.io.File)18 BufferedWriter (java.io.BufferedWriter)15 IOException (java.io.IOException)14 FileOutputStream (java.io.FileOutputStream)13 OutputStreamWriter (java.io.OutputStreamWriter)12 EnumMap (java.util.EnumMap)9 HashMap (java.util.HashMap)9 Visitor (biblemulticonverter.data.FormattedText.Visitor)8 BufferedReader (java.io.BufferedReader)8 FileInputStream (java.io.FileInputStream)8