Search in sources :

Example 11 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

@Override
public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
    metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    bible.getBooks().add(metadata.getBook());
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
                metadata.finished();
            }
            line = br.readLine().trim();
        }
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            }
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url = m.group(1);
                String shortName = replaceEntities(m.group(2));
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                    bookIndex++;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                    jcIndex++;
                } else {
                    throw new IOException(url);
                }
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            }
            line = br.readLine().trim();
        }
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        bible.getBooks().add(vorwort);
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
                }
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<p>&nbsp;</p>");
        }
        vorwort.getChapters().get(0).getProlog().finished();
    }
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            bible.getBooks().add(bk);
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                    prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            }
            if (firstProlog)
                throw new IOException(line);
            prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            prolog.finished();
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                    continue;
                }
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                }
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                }
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                        continue;
                    }
                }
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                    hl.getAppendVisitor().visitText(replaceEntities(headline));
                    headlines.add(hl);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    }
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    }
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    }
                    for (Headline h : headlines) {
                        h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
                    }
                    headlines.clear();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                    currentChapter.getVerses().add(currentVerse);
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    bk.getChapters().add(currentChapter);
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        currentChapter.setProlog(prolog);
                        prolog = null;
                    }
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    }
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                }
                if (!newFootnotes.isEmpty()) {
                    footnotes.addAll(newFootnotes);
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                            footnoteVerses.add(currentVerse.getNumber());
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                        }
                    }
                }
                if (restLine != null)
                    line = restLine;
                else
                    line = skipLines(br);
            }
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
                    vv.trimWhitespace();
                    vv.finished();
                }
            }
        }
    }
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    bible.getBooks().add(anhang);
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        }
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                }
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                        break;
                    }
                }
                bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                vv.visitLineBreak(LineBreakKind.NEWLINE);
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
        }
    }
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
    vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                    }
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    throw new IOException(line);
                }
                line = skipLines(br);
            }
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
        }
    }
    anhang.getChapters().get(0).getProlog().trimWhitespace();
    anhang.getChapters().get(0).getProlog().finished();
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) Date(java.util.Date) BufferedReader(java.io.BufferedReader) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat) Verse(biblemulticonverter.data.Verse)

Example 12 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class OSIS method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    Element osis = doc.createElement("osis");
    doc.appendChild(osis);
    osis.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
    osis.setAttribute("xmlns", "http://www.bibletechnologies.net/2003/OSIS/namespace");
    osis.setAttribute("xsi:schemaLocation", "http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd");
    Element osisText = doc.createElement("osisText");
    osis.appendChild(osisText);
    osisText.setAttribute("canonical", "true");
    osisText.setAttribute("osisIDWork", "Exported");
    osisText.appendChild(buildHeader(doc, bible.getName()));
    for (Book bk : bible.getBooks()) {
        Element book = doc.createElement("div");
        osisText.appendChild(book);
        book.setAttribute("type", "book");
        book.setAttribute("canonical", "true");
        book.setAttribute("osisID", bk.getId().getOsisID());
        Element bookTitle = doc.createElement("title");
        book.appendChild(bookTitle);
        bookTitle.setAttribute("type", "main");
        bookTitle.appendChild(doc.createTextNode(bk.getLongName()));
        int cnumber = 0;
        for (Chapter chp : bk.getChapters()) {
            cnumber++;
            Element chapter = doc.createElement("chapter");
            book.appendChild(chapter);
            chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
            OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
            Element elem = doc.createElement("title");
            chapter.appendChild(elem);
            elem.setAttribute("type", "chapter");
            elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
            if (chp.getProlog() != null) {
                chp.getProlog().accept(visitor);
            }
            for (VirtualVerse vv : chp.createVirtualVerses()) {
                String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
                for (Headline hl : vv.getHeadlines()) {
                    hl.accept(visitor.visitHeadline(hl.getDepth()));
                }
                Element verse = doc.createElement("verse");
                chapter.appendChild(verse);
                verse.setAttribute("osisID", osisID);
                for (Verse v : vv.getVerses()) {
                    if (!v.getNumber().equals("" + vv.getNumber())) {
                        elem = doc.createElement("hi");
                        verse.appendChild(elem);
                        elem.setAttribute("type", "bold");
                        elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
                    }
                    v.accept(new OSISVisitor(verse, bk.getId().isNT()));
                }
            }
        }
    }
    String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
    if (!milestonedElementNames.equals("-")) {
        Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
        Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
        unsupportedMilestonedElements.removeAll(GENERATED_MILESTONEABLE_ELEMENTS);
        if (!unsupportedMilestonedElements.isEmpty()) {
            for (String elem : unsupportedMilestonedElements) {
                if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
                    System.out.println("ERROR: " + elem + " may not be milestoned");
                } else {
                    System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
                }
            }
            throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
        }
        convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
    }
    TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) VirtualVerse(biblemulticonverter.data.VirtualVerse) StreamResult(javax.xml.transform.stream.StreamResult) Element(org.w3c.dom.Element) Chapter(biblemulticonverter.data.Chapter) Document(org.w3c.dom.Document) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) File(java.io.File) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse) HashSet(java.util.HashSet)

Example 13 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class OSIS method parseChapter.

private void parseChapter(String chapterName, Element osisChapter, Chapter chapter, List<Element> unclosedElements) {
    int lastVerse = -1;
    List<Headline> headlines = new ArrayList<Headline>();
    for (Node node = osisChapter.getFirstChild(); node != null; node = node.getNextSibling()) {
        boolean startProlog = false;
        if (node instanceof Text) {
            if (node.getTextContent().trim().length() == 0)
                continue;
            if (lastVerse == -1) {
                startProlog = true;
            } else {
                printWarning("WARNING: Non-whitespace at chapter level: " + node.getTextContent());
            }
        } else if (node instanceof Element) {
            Element elem = (Element) node;
            if (elem.getNodeName().equals("title")) {
                Headline hl = new Headline(2);
                if (elem.getAttribute("type").equals("chapter")) {
                    hl = new Headline(1);
                }
                if (elem.getChildNodes().getLength() == 1 && elem.getFirstChild() instanceof Text) {
                    String text = elem.getFirstChild().getTextContent();
                    if (!text.equals(text.trim())) {
                        printWarning("WARNING: Whitespace at beginning/end of headline: '" + text + "'");
                        elem.getFirstChild().setNodeValue(text.trim());
                    }
                }
                convertFromMilestoned(elem, unclosedElements);
                parseFormattedText(null, elem, hl);
                if (hl.getElementTypes(1).length() == 0) {
                    printWarning("WARNING: Empty headline in " + chapterName);
                } else {
                    headlines.add(hl);
                }
            } else if (elem.getNodeName().equals("verse")) {
                String osisID = elem.getAttribute("osisID");
                if (!elem.getAttribute("sID").isEmpty() || !elem.getAttribute("sID").isEmpty())
                    throw new IllegalArgumentException("verse should have been de-milestoned already.");
                if (osisID.isEmpty())
                    throw new IllegalStateException("Verse without osisID");
                if (!osisID.startsWith(chapterName + "."))
                    throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
                String vnumber = osisID.substring(chapterName.length() + 1);
                if (osisID.contains(" ")) {
                    vnumber = vnumber.substring(0, vnumber.indexOf(' '));
                    lastVerse = Integer.parseInt(vnumber);
                    int nextInRange = lastVerse + 1;
                    boolean first = true;
                    for (String part : osisID.split(" ")) {
                        if (first) {
                            first = false;
                            continue;
                        }
                        if (!part.startsWith(chapterName + "."))
                            throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
                        String partNumber = part.substring(chapterName.length() + 1);
                        vnumber = vnumber + "." + partNumber;
                        if (partNumber.equals("" + nextInRange)) {
                            nextInRange++;
                        } else {
                            nextInRange = -1;
                        }
                    }
                    if (nextInRange != -1) {
                        vnumber = lastVerse + "-" + (nextInRange - 1);
                    }
                } else {
                    lastVerse = Integer.parseInt(vnumber);
                }
                Verse verse = new Verse(vnumber);
                warningContext = osisID;
                for (Headline hl : headlines) {
                    hl.accept(verse.getAppendVisitor().visitHeadline(hl.getDepth()));
                }
                headlines.clear();
                chapter.getVerses().add(verse);
                convertFromMilestoned(elem, unclosedElements);
                parseFormattedText(osisID, elem, verse);
                verse.trimWhitespace();
                verse.finished();
                if (verse.getElementTypes(1).length() == 0) {
                    printWarning("WARNING: Empty verse " + osisID);
                    chapter.getVerses().remove(verse);
                }
                warningContext += " (after closing)";
            } else if (lastVerse == -1) {
                startProlog = true;
            } else {
                printWarning("WARNING: " + elem.getNodeName() + " at invalid location");
            }
        }
        if (startProlog) {
            Element holder = osisChapter.getOwnerDocument().createElement("prolog");
            osisChapter.insertBefore(holder, node);
            while (holder.getNextSibling() != null && !holder.getNextSibling().getNodeName().equals("verse")) {
                holder.appendChild(holder.getNextSibling());
            }
            lastVerse = 0;
            FormattedText prolog = new FormattedText();
            chapter.setProlog(prolog);
            for (Headline hl : headlines) {
                hl.accept(prolog.getAppendVisitor().visitHeadline(hl.getDepth()));
            }
            headlines.clear();
            convertFromMilestoned(holder, unclosedElements);
            parseFormattedText(null, holder, prolog);
            prolog.trimWhitespace();
            prolog.finished();
            node = holder;
        }
    }
    if (headlines.size() > 0)
        printWarning("WARNING: Unused headlines: " + headlines.size());
}
Also used : Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) ArrayList(java.util.ArrayList) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) FormattedText(biblemulticonverter.data.FormattedText) Headline(biblemulticonverter.data.FormattedText.Headline) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 14 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class YCHPalmBible method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    String filename = exportArgs[0];
    String description = bible.getName();
    MetadataBook metadata = bible.getMetadataBook();
    if (metadata != null) {
        String metaDescription = bible.getMetadataBook().getValue(MetadataBookKey.description);
        if (metaDescription != null)
            description = metaDescription;
    }
    try (final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "windows-1252"))) {
        bw.write("<PARSERINFO ENCODE=\"Cp1252\" WORDTYPE=\"SPCSEP\">");
        bw.newLine();
        bw.write("<BIBLE NAME=\"" + bible.getName() + "\" INFO=\"" + description + "\">");
        bw.newLine();
        Visitor<IOException> contentVisitor = new FormattedText.VisitorAdapter<IOException>(null) {

            @Override
            public void visitVerseSeparator() throws IOException {
            // strip
            }

            @Override
            public void visitText(String text) throws IOException {
                bw.write(text);
            }

            @Override
            public Visitor<IOException> visitFormattingInstruction(FormattingInstructionKind kind) throws IOException {
                return this;
            }

            @Override
            public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
                return this;
            }

            @Override
            public void visitLineBreak(LineBreakKind kind) throws IOException {
                bw.write(" ");
            }
        };
        for (Book bk : bible.getBooks()) {
            int zefID = bk.getId().getZefID();
            if (zefID < 1 || zefID >= PALM_BOOK_NUMBERS.length || PALM_BOOK_NUMBERS[zefID] == 0) {
                System.out.println("WARNING: Skipping unsupported book " + bk.getAbbr() + " (" + bk.getId().getOsisID() + ")");
                continue;
            }
            bw.write("<BOOK NAME=\"" + bk.getShortName() + "\" NUMBER=\"" + PALM_BOOK_NUMBERS[bk.getId().getZefID()] + "\" SHORTCUT=\"" + bk.getAbbr() + "\">");
            bw.newLine();
            String longtitle = bk.getLongName();
            int chapter = 0, verse;
            for (Chapter chap : bk.getChapters()) {
                chapter++;
                if (chap.getProlog() != null)
                    System.out.println("WARNING: Skipping prolog (prologs not supported)!");
                verse = 1;
                String chaptext = CHAPTER_NAME + " " + chapter;
                bw.write("<CHAPTER>");
                bw.newLine();
                for (VirtualVerse v : chap.createVirtualVerses()) {
                    while (v.getNumber() > verse) {
                        bw.write("<VERSE></VERSE>");
                        bw.newLine();
                        verse++;
                    }
                    if (v.getNumber() != verse)
                        throw new RuntimeException("Verse is " + v.getNumber() + ", should be " + verse);
                    boolean needVersText = false;
                    bw.write("<VERSE>");
                    if (longtitle != null) {
                        bw.write("<BOOKTEXT>" + longtitle);
                        longtitle = null;
                        needVersText = true;
                    }
                    if (chaptext != null) {
                        bw.write("<CHAPTEXT>" + chaptext);
                        chaptext = null;
                        needVersText = true;
                    }
                    for (Headline hl : v.getHeadlines()) {
                        bw.write("<DESCTEXT>");
                        hl.accept(contentVisitor);
                        needVersText = true;
                    }
                    if (needVersText)
                        bw.write("<VERSTEXT>");
                    for (Verse vv : v.getVerses()) {
                        if (!vv.getNumber().equals("" + v.getNumber())) {
                            bw.write("{" + vv.getNumber() + "} ");
                        }
                        vv.accept(contentVisitor);
                    }
                    bw.write("</VERSE>");
                    verse++;
                    bw.newLine();
                }
                bw.write("</CHAPTER>");
                bw.newLine();
            }
            bw.write("</BOOK>");
            bw.newLine();
        }
        bw.write("</BIBLE>");
        bw.newLine();
    }
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) VirtualVerse(biblemulticonverter.data.VirtualVerse) Chapter(biblemulticonverter.data.Chapter) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) LineBreakKind(biblemulticonverter.data.FormattedText.LineBreakKind) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) FileOutputStream(java.io.FileOutputStream) Headline(biblemulticonverter.data.FormattedText.Headline) OutputStreamWriter(java.io.OutputStreamWriter) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 15 with Headline

use of biblemulticonverter.data.FormattedText.Headline in project BibleMultiConverter by schierlm.

the class ZefaniaXML method createXMLBible.

protected XMLBIBLE createXMLBible(Bible bible) throws Exception {
    ObjectFactory of = new ObjectFactory();
    XMLBIBLE doc = of.createXMLBIBLE();
    doc.setBiblename(bible.getName());
    doc.setType(EnumModtyp.X_BIBLE);
    doc.setINFORMATION(of.createINFORMATION());
    MetadataBook metadata = bible.getMetadataBook();
    if (metadata != null) {
        for (String key : metadata.getKeys()) {
            String value = metadata.getValue(key);
            if (key.equals(MetadataBookKey.status.toString())) {
                doc.setStatus(EnumStatus.fromValue(value));
            } else if (key.equals(MetadataBookKey.version.toString())) {
                doc.setVersion(value);
            } else if (key.equals(MetadataBookKey.revision.toString())) {
                doc.setRevision(new BigInteger(value));
            } else if (!key.contains("@")) {
                Pattern regex = INFORMATION_FIELDS.get(MetadataBookKey.valueOf(key));
                if (regex != null && regex.matcher(value).matches())
                    doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
            }
        }
    }
    doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName("format"), String.class, "Zefania XML Bible Markup Language"));
    for (Book bk : bible.getBooks()) {
        if (bk.getId().equals(BookID.METADATA))
            continue;
        if (bk.getId().getZefID() <= 0) {
            System.out.println("WARNING: Unable to export book " + bk.getAbbr());
            continue;
        }
        BIBLEBOOK bb = of.createBIBLEBOOK();
        bb.setBnumber(BigInteger.valueOf(bk.getId().getZefID()));
        bb.setBsname(bk.getShortName());
        bb.setBname(bk.getLongName());
        int cnumber = 0;
        for (Chapter ccc : bk.getChapters()) {
            cnumber++;
            if (ccc.getVerses().size() == 0)
                continue;
            CHAPTER cc = of.createCHAPTER();
            cc.setCnumber(BigInteger.valueOf(cnumber));
            bb.getCHAPTER().add(cc);
            if (ccc.getProlog() != null) {
                PROLOG prolog = of.createPROLOG();
                prolog.setVref(BigInteger.ONE);
                ccc.getProlog().accept(new CreateContentVisitor(of, prolog.getContent(), null));
                cc.getPROLOGOrCAPTIONOrVERS().add(prolog);
            }
            for (VirtualVerse vv : ccc.createVirtualVerses()) {
                for (Headline h : vv.getHeadlines()) {
                    CAPTION caption = of.createCAPTION();
                    caption.setVref(BigInteger.valueOf(vv.getNumber()));
                    h.accept(new CreateContentVisitor(of, caption.getContent(), null));
                    EnumCaptionType[] types = new EnumCaptionType[] { null, EnumCaptionType.X_H_1, EnumCaptionType.X_H_2, EnumCaptionType.X_H_3, EnumCaptionType.X_H_4, EnumCaptionType.X_H_5, EnumCaptionType.X_H_6, null, null, null };
                    caption.setType(types[h.getDepth()]);
                    cc.getPROLOGOrCAPTIONOrVERS().add(caption);
                }
                VERS vers = of.createVERS();
                vers.setVnumber(BigInteger.valueOf(vv.getNumber()));
                for (Verse v : vv.getVerses()) {
                    if (!v.getNumber().equals("" + vv.getNumber())) {
                        STYLE verseNum = of.createSTYLE();
                        verseNum.setCss("font-weight: bold");
                        verseNum.getContent().add("(" + v.getNumber() + ")");
                        vers.getContent().add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, verseNum));
                        vers.getContent().add(" ");
                    }
                    v.accept(new CreateContentVisitor(of, vers.getContent(), vers));
                }
                cc.getPROLOGOrCAPTIONOrVERS().add(vers);
            }
        }
        doc.getBIBLEBOOK().add(bb);
    }
    return doc;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) VirtualVerse(biblemulticonverter.data.VirtualVerse) XMLBIBLE(biblemulticonverter.schema.zef2005.XMLBIBLE) QName(javax.xml.namespace.QName) Chapter(biblemulticonverter.data.Chapter) STYLE(biblemulticonverter.schema.zef2005.STYLE) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) ObjectFactory(biblemulticonverter.schema.zef2005.ObjectFactory) EnumCaptionType(biblemulticonverter.schema.zef2005.EnumCaptionType) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) BigInteger(java.math.BigInteger) PROLOG(biblemulticonverter.schema.zef2005.PROLOG) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Aggregations

Headline (biblemulticonverter.data.FormattedText.Headline)24 Verse (biblemulticonverter.data.Verse)20 Chapter (biblemulticonverter.data.Chapter)18 VirtualVerse (biblemulticonverter.data.VirtualVerse)18 Book (biblemulticonverter.data.Book)17 MetadataBook (biblemulticonverter.data.MetadataBook)11 FormattedText (biblemulticonverter.data.FormattedText)10 ArrayList (java.util.ArrayList)10 BookID (biblemulticonverter.data.BookID)8 Bible (biblemulticonverter.data.Bible)6 BufferedWriter (java.io.BufferedWriter)6 File (java.io.File)6 FileOutputStream (java.io.FileOutputStream)6 IOException (java.io.IOException)6 BIBLEBOOK (biblemulticonverter.schema.zef2005.BIBLEBOOK)5 CAPTION (biblemulticonverter.schema.zef2005.CAPTION)5 CHAPTER (biblemulticonverter.schema.zef2005.CHAPTER)5 VERS (biblemulticonverter.schema.zef2005.VERS)5 OutputStreamWriter (java.io.OutputStreamWriter)5 EnumMap (java.util.EnumMap)5