Search in sources :

Example 26 with Book

use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.

the class EquipdEPUB method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    final Set<String> unsupportedFeatures = new HashSet<>();
    String uuid = UUID.randomUUID().toString() + "-" + System.currentTimeMillis() / 1000;
    Boolean headlinesAfter = false;
    if (exportArgs.length > 1) {
        if (exportArgs[1].equals("-headlinesAfter"))
            headlinesAfter = true;
        else if (exportArgs[1].equals("-noHeadlines"))
            headlinesAfter = null;
        else
            System.out.println("WARNING: Unsupported argument: " + exportArgs[1]);
    }
    try (ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(exportArgs[0] + ".epub"))) {
        zos.putNextEntry(new ZipEntry("mimetype"));
        zos.write("application/epub+zip".getBytes(StandardCharsets.US_ASCII));
        zos.putNextEntry(new ZipEntry("META-INF/container.xml"));
        zos.write(("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<container" + " version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n <rootfiles>\n" + "  <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n" + " </rootfiles>\n</container>").getBytes(StandardCharsets.US_ASCII));
        zos.putNextEntry(new ZipEntry("OEBPS/content.opf"));
        StringBuilder sb = new StringBuilder();
        sb.append("<?xml version=\"1.0\"?>\n<package version=\"2.0\" xmlns=\"http://www.idpf.org/2007/opf\"" + " unique-identifier=\"uuid\">\n  <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\"" + " xmlns:opf=\"http://www.idpf.org/2007/opf\">\n   <dc:title>" + xml(bible.getName()) + "</dc:title>\n    <dc:creator opf:role=\"aut\"></dc:creator>\n    <dc:language>en</dc:language>\n" + "    <dc:rights>Public Domain</dc:rights>\n    <dc:publisher></dc:publisher>\n" + "    <dc:identifier id=\"uuid\">" + uuid + "</dc:identifier>\n  </metadata>\n  <manifest>\n" + "    <item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\" />\n" + "    <item id=\"style\" href=\"global.css\" media-type=\"text/css\" />\n");
        int counter = 1;
        for (Book book : bible.getBooks()) {
            if (book.getId().getZefID() < 1 || book.getId().getZefID() > 66)
                continue;
            String fileName = String.format("%02d.%s", book.getId().getZefID(), BOOK_NAMES[book.getId().getZefID()]);
            for (int i = 0; i <= book.getChapters().size(); i++) {
                sb.append("    <item id=\"chapter" + counter + "\" href=\"" + fileName + "." + i + ".xhtml\" media-type=\"application/xhtml+xml\" />\n");
                counter++;
            }
        }
        sb.append("  </manifest>\n  <spine toc=\"ncx\">\n");
        for (int i = 1; i < counter; i++) {
            sb.append("    <itemref idref=\"chapter" + i + "\" />\n");
        }
        sb.append("  </spine>\n</package>");
        zos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
        zos.putNextEntry(new ZipEntry("OEBPS/global.css"));
        zos.write(("span.vn {\n  font-weight: bold;\n}\n" + "div.ci a {\n  display: block;\n  float: left;\n  margin: 0 15px 15px 0;\n}\n" + "").getBytes(StandardCharsets.US_ASCII));
        zos.putNextEntry(new ZipEntry("OEBPS/toc.ncx"));
        sb.setLength(0);
        sb.append("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<ncx" + " xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\">\n  <head>\n" + "    <meta content=\"urn:uuid:" + uuid + "\" name=\"dtb:uuid\" />\n    <meta content=\"2\"" + " name=\"dtb:depth\" />\n    <meta content=\"0\" name=\"dtb:totalPageCount\" />\n" + "    <meta content=\"0\" name=\"dtb:maxPageNumber\" />\n  </head>\n  <docTitle>\n    <text>" + xml(bible.getName()) + "</text>\n  </docTitle>\n  <navMap>\n");
        counter = 1;
        for (Book book : bible.getBooks()) {
            if (book.getId().getZefID() < 1 || book.getId().getZefID() > 66)
                continue;
            String fileName = String.format("%02d.%s", book.getId().getZefID(), BOOK_NAMES[book.getId().getZefID()]);
            sb.append("    <navPoint id=\"navPoint" + counter + "\" playOrder=\"" + counter + "\">\n      <navLabel><text>" + xml(book.getShortName()) + "</text></navLabel>\n      <content src=\"" + fileName + ".0.xhtml\" />\n" + "    </navPoint>\n");
            counter++;
        }
        sb.append("  </navMap>\n</ncx>");
        zos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
        for (Book book : bible.getBooks()) {
            if (book.getId().getZefID() < 1 || book.getId().getZefID() > 66) {
                System.out.println("WARNING: Skipping book " + book.getAbbr());
                continue;
            }
            String fileName = String.format("%02d.%s", book.getId().getZefID(), BOOK_NAMES[book.getId().getZefID()]);
            zos.putNextEntry(new ZipEntry("OEBPS/" + fileName + ".0.xhtml"));
            sb.setLength(0);
            sb.append("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<html dir=\"ltr\" xmlns=\"http://www.w3.org/1999/xhtml\" " + "xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n  <meta http-equiv=\"Content-Type\"" + " content=\"text/html; charset=UTF-8\" />\n  <title>" + xml(book.getShortName()) + " </title>\n" + "  <link rel=\"stylesheet\" href=\"global.css\" type=\"text/css\" />\n</head>\n<body>\n\n<h2>" + xml(book.getShortName()) + " </h2>\n\n<div class=\"ci\">\n");
            for (int i = 1; i <= book.getChapters().size(); i++) {
                sb.append("<a href=\"" + fileName + "." + i + ".xhtml\">" + i + "</a>");
            }
            sb.append("\n</div>\n\n</body>\n</html>");
            zos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
            for (int i = 1; i <= book.getChapters().size(); i++) {
                zos.putNextEntry(new ZipEntry("OEBPS/" + fileName + "." + i + ".xhtml"));
                StringWriter sw = new StringWriter();
                writeChapter(sw, unsupportedFeatures, book, i, headlinesAfter);
                zos.write(sw.toString().getBytes(StandardCharsets.UTF_8));
            }
        }
    }
    if (!unsupportedFeatures.isEmpty()) {
        System.out.println("WARNING: Skipped unsupported features: " + unsupportedFeatures);
    }
}
Also used : StringWriter(java.io.StringWriter) ZipOutputStream(java.util.zip.ZipOutputStream) Book(biblemulticonverter.data.Book) FileOutputStream(java.io.FileOutputStream) ZipEntry(java.util.zip.ZipEntry) HashSet(java.util.HashSet)

Example 27 with Book

use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.

the class HeatMapHTML method build.

private void build(File outputFile, Bible bible, String title, Categorization[] categorizations, Map<BookID, int[][][]> rawDataPerBook) throws Exception {
    List<Section> otBooks = new ArrayList<>(), ntBooks = new ArrayList<>();
    for (Book book : bible.getBooks()) {
        int[][][] rawData = rawDataPerBook.get(book.getId());
        if (rawData == null)
            continue;
        List<Section> bookList = book.getId().isNT() ? ntBooks : otBooks;
        int chapCount = book.getChapters().size();
        if (chapCount == 1) {
            if (!book.getChapters().get(0).getVerses().isEmpty()) {
                String[] verseNumbers = new String[book.getChapters().get(0).getVerses().size()];
                for (int i = 0; i < verseNumbers.length; i++) {
                    verseNumbers[i] = book.getChapters().get(0).getVerses().get(i).getNumber();
                }
                bookList.add(new Section(book.getShortName(), rawData[0], verseNumbers, categorizations));
            }
        } else {
            List<Section> chapterList = new ArrayList<>();
            for (int i = 0; i < chapCount; i++) {
                if (!book.getChapters().get(i).getVerses().isEmpty()) {
                    String[] verseNumbers = new String[book.getChapters().get(i).getVerses().size()];
                    for (int j = 0; j < verseNumbers.length; j++) {
                        verseNumbers[j] = book.getChapters().get(i).getVerses().get(j).getNumber();
                    }
                    chapterList.add(new Section(book.getShortName() + " " + (i + 1), rawData[i], verseNumbers, categorizations));
                }
            }
            if (!chapterList.isEmpty())
                bookList.add(new Section(book.getShortName(), chapterList));
        }
    }
    List<Section> testaments = new ArrayList<>();
    if (!otBooks.isEmpty())
        testaments.add(new Section(System.getProperty("heatmap.label.ot", "Old Testament"), otBooks));
    if (!ntBooks.isEmpty())
        testaments.add(new Section(System.getProperty("heatmap.label.ot", "New Testament"), ntBooks));
    Section total = new Section(System.getProperty("heatmap.label.total", "Total"), testaments);
    String infoText = System.getProperty("heatmap.label.infotext", "");
    try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8))) {
        bw.write("<html>\n" + "<head>\n" + "<meta charset=\"UTF-8\">\n" + "<title>" + html(title) + "</title>\n" + "<style type=\"text/css\">\n" + "body { font-family: Verdana, Arial, Helvetica, sans-serif; }\n" + "table {border-collapse: collapse; font-size: 6pt; }\n" + "td, th {border: 1px solid #777; }\n" + "span.versedetails {display: inline-block; min-width: 2em; text-align: center; border: 1px solid black; margin: 1px; }\n" + "span.versedetails b {margin: 0px 2px;}\n" + "</style>\n" + "<script>\n" + "function toggle(elem) {\n" + "    var label = '';\n" + "    var trs = document.getElementsByTagName('tr');\n" + "    for (var i=0; i < trs.length; i++) {\n" + "        var id = trs[i].id;\n" + "        if (id == elem) {\n" + "            label = trs[i].getElementsByTagName('a')[0].innerHTML;\n" + "            if (label == '[+]')\n" + "                trs[i].getElementsByTagName('a')[0].innerHTML = '[-]';\n" + "            else\n" + "                trs[i].getElementsByTagName('a')[0].innerHTML = '[+]';\n" + "        } else if (id.length > elem.length && id.substring(0, elem.length+1) == elem+'_') {\n" + "            if (label=='[-]') {\n" + "                trs[i].style.display='none';\n" + "            } else if (id.substring(elem.length+1).indexOf('_') == -1) {\n" + "                trs[i].style.display='';\n" + "            }\n" + "        }\n" + "    }\n" + "}\n" + "window.onload = function() {\n" + "    document.getElementById('root').style.display='';\n" + "    toggle('root');\n" + "}\n" + "</script>\n" + "</head>\n" + "<body>\n" + "<h1>" + html(title) + "</h1>\n" + (infoText.isEmpty() ? "" : "<p>" + html(infoText) + "</p>\n") + "<table>");
        for (Categorization c : categorizations) {
            bw.write("<tr><th colspan=\"2\">" + html(c.title) + "</th></tr>");
            for (int i = 0; i < c.colors.length; i++) {
                bw.write("<tr><th style=\"background-color: " + c.colors[i] + "\">#" + (i + 1) + "</th><td>" + html(c.names[i]) + "</td></tr>");
            }
        }
        bw.write("</table>\n<br />\n");
        bw.write("<table><tr><th>" + html(System.getProperty("heatmap.label.section", "Section")) + "</th>");
        for (Categorization c : categorizations) {
            bw.write("<th>" + html(c.title) + "</th>");
            for (int i = 0; i < c.colors.length; i++) {
                bw.write("<th style=\"background-color: " + c.colors[i] + "\"><i>" + html(c.names[i]) + "</i></th>");
            }
        }
        bw.write("</tr>\n");
        appendHTML(bw, 0, "root", total, categorizations);
        bw.write("</table>");
    }
}
Also used : Book(biblemulticonverter.data.Book) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 28 with Book

use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

@Override
public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
    metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    bible.getBooks().add(metadata.getBook());
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
                metadata.finished();
            }
            line = br.readLine().trim();
        }
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            }
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url = m.group(1);
                String shortName = replaceEntities(m.group(2));
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                    bookIndex++;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                    jcIndex++;
                } else {
                    throw new IOException(url);
                }
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            }
            line = br.readLine().trim();
        }
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        bible.getBooks().add(vorwort);
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
                }
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<p>&nbsp;</p>");
        }
        vorwort.getChapters().get(0).getProlog().finished();
    }
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            bible.getBooks().add(bk);
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                    prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            }
            if (firstProlog)
                throw new IOException(line);
            prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            prolog.finished();
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                    continue;
                }
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                }
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                }
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                        continue;
                    }
                }
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                    hl.getAppendVisitor().visitText(replaceEntities(headline));
                    headlines.add(hl);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    }
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    }
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    }
                    for (Headline h : headlines) {
                        h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
                    }
                    headlines.clear();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                    currentChapter.getVerses().add(currentVerse);
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    bk.getChapters().add(currentChapter);
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        currentChapter.setProlog(prolog);
                        prolog = null;
                    }
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    }
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                }
                if (!newFootnotes.isEmpty()) {
                    footnotes.addAll(newFootnotes);
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                            footnoteVerses.add(currentVerse.getNumber());
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                        }
                    }
                }
                if (restLine != null)
                    line = restLine;
                else
                    line = skipLines(br);
            }
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
                    vv.trimWhitespace();
                    vv.finished();
                }
            }
        }
    }
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    bible.getBooks().add(anhang);
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        }
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                }
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                        break;
                    }
                }
                bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                vv.visitLineBreak(LineBreakKind.NEWLINE);
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
        }
    }
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
    vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                    }
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    throw new IOException(line);
                }
                line = skipLines(br);
            }
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
        }
    }
    anhang.getChapters().get(0).getProlog().trimWhitespace();
    anhang.getChapters().get(0).getProlog().finished();
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) Date(java.util.Date) BufferedReader(java.io.BufferedReader) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat) Verse(biblemulticonverter.data.Verse)

Example 29 with Book

use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.

the class OSIS method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    Element osis = doc.createElement("osis");
    doc.appendChild(osis);
    osis.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
    osis.setAttribute("xmlns", "http://www.bibletechnologies.net/2003/OSIS/namespace");
    osis.setAttribute("xsi:schemaLocation", "http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd");
    Element osisText = doc.createElement("osisText");
    osis.appendChild(osisText);
    osisText.setAttribute("canonical", "true");
    osisText.setAttribute("osisIDWork", "Exported");
    osisText.appendChild(buildHeader(doc, bible.getName()));
    for (Book bk : bible.getBooks()) {
        Element book = doc.createElement("div");
        osisText.appendChild(book);
        book.setAttribute("type", "book");
        book.setAttribute("canonical", "true");
        book.setAttribute("osisID", bk.getId().getOsisID());
        Element bookTitle = doc.createElement("title");
        book.appendChild(bookTitle);
        bookTitle.setAttribute("type", "main");
        bookTitle.appendChild(doc.createTextNode(bk.getLongName()));
        int cnumber = 0;
        for (Chapter chp : bk.getChapters()) {
            cnumber++;
            Element chapter = doc.createElement("chapter");
            book.appendChild(chapter);
            chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
            OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
            Element elem = doc.createElement("title");
            chapter.appendChild(elem);
            elem.setAttribute("type", "chapter");
            elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
            if (chp.getProlog() != null) {
                chp.getProlog().accept(visitor);
            }
            for (VirtualVerse vv : chp.createVirtualVerses()) {
                String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
                for (Headline hl : vv.getHeadlines()) {
                    hl.accept(visitor.visitHeadline(hl.getDepth()));
                }
                Element verse = doc.createElement("verse");
                chapter.appendChild(verse);
                verse.setAttribute("osisID", osisID);
                for (Verse v : vv.getVerses()) {
                    if (!v.getNumber().equals("" + vv.getNumber())) {
                        elem = doc.createElement("hi");
                        verse.appendChild(elem);
                        elem.setAttribute("type", "bold");
                        elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
                    }
                    v.accept(new OSISVisitor(verse, bk.getId().isNT()));
                }
            }
        }
    }
    String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
    if (!milestonedElementNames.equals("-")) {
        Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
        Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
        unsupportedMilestonedElements.removeAll(GENERATED_MILESTONEABLE_ELEMENTS);
        if (!unsupportedMilestonedElements.isEmpty()) {
            for (String elem : unsupportedMilestonedElements) {
                if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
                    System.out.println("ERROR: " + elem + " may not be milestoned");
                } else {
                    System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
                }
            }
            throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
        }
        convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
    }
    TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) VirtualVerse(biblemulticonverter.data.VirtualVerse) StreamResult(javax.xml.transform.stream.StreamResult) Element(org.w3c.dom.Element) Chapter(biblemulticonverter.data.Chapter) Document(org.w3c.dom.Document) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) File(java.io.File) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse) HashSet(java.util.HashSet)

Example 30 with Book

use of biblemulticonverter.data.Book in project BibleMultiConverter by schierlm.

the class OSIS method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    ValidateXML.validateFileBeforeParsing(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(ObjectFactory.class.getResource("/osisCore.2.1.1.xsd")), inputFile);
    printedWarnings.clear();
    DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    XPath xpath = javax.xml.xpath.XPathFactory.newInstance().newXPath();
    Document osisDoc = docBuilder.parse(inputFile);
    String name = xpath.evaluate("/osis/osisText/header/work/title/text()", osisDoc);
    if (name.isEmpty())
        name = "OSIS Bible";
    Bible result = new Bible(name);
    String description = xpath.evaluate("/osis/osisText/header/work/description/text()", osisDoc);
    String rights = xpath.evaluate("/osis/osisText/header/work/rights/text()", osisDoc);
    if (!description.isEmpty() || !rights.isEmpty()) {
        String date = xpath.evaluate("/osis/osisText/header/work/date/text()", osisDoc);
        String titleDesc = xpath.evaluate("/osis/osisText/titlePage/description/text()", osisDoc);
        MetadataBook mb = new MetadataBook();
        if (!description.isEmpty())
            mb.setValue(MetadataBookKey.description, description.replaceAll("[\r\n\t ]+", " ").trim());
        if (!rights.isEmpty())
            mb.setValue(MetadataBookKey.rights, rights.replaceAll("[\r\n\t ]+", " ").trim());
        if (!date.isEmpty())
            mb.setValue(MetadataBookKey.date, date);
        if (!titleDesc.isEmpty())
            mb.setValue("description@titlePage", titleDesc.replaceAll("[\r\n\t ]+", " ").trim());
        mb.finished();
        result.getBooks().add(mb.getBook());
    }
    NodeList osisBooks = (NodeList) xpath.evaluate("/osis/osisText//div[@type='book']", osisDoc, XPathConstants.NODESET);
    for (int bookIndex = 0; bookIndex < osisBooks.getLength(); bookIndex++) {
        Element osisBook = (Element) osisBooks.item(bookIndex);
        if (!osisBook.getAttribute("sID").isEmpty()) {
            Element osisBookEnd = (Element) xpath.evaluate("//div[@eID='" + osisBook.getAttribute("sID") + "']", osisDoc, XPathConstants.NODE);
            if (osisBookEnd == null) {
                throw new IllegalStateException("No milestoned div found with eID " + osisBook.getAttribute("sID"));
            }
            if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
                List<Node> candidates = new ArrayList<>();
                Node commonParent = osisBookEnd;
                while (commonParent != null) {
                    candidates.add(commonParent);
                    commonParent = commonParent.getParentNode();
                }
                commonParent = osisBook;
                search: while (commonParent != null) {
                    for (Node candidate : candidates) {
                        if (commonParent.isSameNode(candidate)) {
                            break search;
                        }
                    }
                    commonParent = commonParent.getParentNode();
                }
                if (commonParent == null)
                    throw new IllegalStateException("Unable to find common parent of milestoned div start and end tag");
                convertToMilestoned((Element) commonParent);
                if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
                    throw new IllegalStateException("Unable to normalize XML so that milestoned div start and end tags are siblings");
                }
            }
            while (osisBook.getNextSibling() != null && !osisBook.getNextSibling().isSameNode(osisBookEnd)) {
                osisBook.appendChild(osisBook.getNextSibling());
            }
            osisBookEnd.getParentNode().removeChild(osisBookEnd);
        }
        String bookOsisID = osisBook.getAttribute("osisID");
        BookID bookID = BookID.fromOsisId(bookOsisID);
        String title = bookID.getEnglishName();
        Node titleElem = osisBook.getFirstChild();
        while (titleElem instanceof Text) titleElem = titleElem.getNextSibling();
        if (titleElem instanceof Element && titleElem.getNodeName().equals("title")) {
            Element titleElement = (Element) titleElem;
            if (titleElement.getAttribute("type").equals("main") && titleElement.getChildNodes().getLength() > 0)
                title = titleElement.getTextContent();
        }
        Book bibleBook = new Book(bookOsisID, bookID, title, title);
        result.getBooks().add(bibleBook);
        parseBook(bookOsisID, osisBook, bibleBook);
    }
    return result;
}
Also used : XPath(javax.xml.xpath.XPath) MetadataBook(biblemulticonverter.data.MetadataBook) Bible(biblemulticonverter.data.Bible) NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) Document(org.w3c.dom.Document) BookID(biblemulticonverter.data.BookID) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book)

Aggregations

Book (biblemulticonverter.data.Book)67 Chapter (biblemulticonverter.data.Chapter)60 Verse (biblemulticonverter.data.Verse)57 FormattedText (biblemulticonverter.data.FormattedText)27 MetadataBook (biblemulticonverter.data.MetadataBook)25 VirtualVerse (biblemulticonverter.data.VirtualVerse)24 BookID (biblemulticonverter.data.BookID)22 ArrayList (java.util.ArrayList)22 File (java.io.File)20 Bible (biblemulticonverter.data.Bible)19 Headline (biblemulticonverter.data.FormattedText.Headline)18 BufferedWriter (java.io.BufferedWriter)16 FileOutputStream (java.io.FileOutputStream)15 IOException (java.io.IOException)14 OutputStreamWriter (java.io.OutputStreamWriter)13 HashMap (java.util.HashMap)10 HashSet (java.util.HashSet)10 Visitor (biblemulticonverter.data.FormattedText.Visitor)9 FileInputStream (java.io.FileInputStream)9 EnumMap (java.util.EnumMap)9