Search in sources :

Example 26 with Chapter

use of in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (");
    metadata.setValue(MetadataBookKey.source, "");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
            line = br.readLine().trim();
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url =;
                String shortName = replaceEntities(;
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                } else {
                    throw new IOException(url);
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            line = br.readLine().trim();
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
            } else {
                throw new IOException(line);
            line = skipLines(br, "<p>&nbsp;</p>");
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr,, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            if (firstProlog)
                throw new IOException(line);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    for (Headline h : headlines) {
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        prolog = null;
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                if (!newFootnotes.isEmpty()) {
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                if (restLine != null)
                    line = restLine;
                    line = skipLines(br);
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                bold.visitCrossReference(m.abbr,, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"\" target=\"_blank\">");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                } else {
                    throw new IOException(line);
                line = skipLines(br);
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
    return bible;
Also used : Visitor( Matcher(java.util.regex.Matcher) Bible( ArrayList(java.util.ArrayList) MetadataBook( Book( Headline( MetadataBook( Pattern(java.util.regex.Pattern) Chapter( IOException( FormattedText( Date(java.util.Date) BufferedReader( File( SimpleDateFormat(java.text.SimpleDateFormat) Verse(

Example 27 with Chapter

use of in project BibleMultiConverter by schierlm.

the class NeUeParser method getPrologVisitor.

private Visitor<RuntimeException> getPrologVisitor(Book book) {
    FormattedText prolog = new FormattedText();
    book.getChapters().add(new Chapter());
    return prolog.getAppendVisitor();
Also used : Chapter( FormattedText(

Example 28 with Chapter

use of in project BibleMultiConverter by schierlm.

the class OSIS method doExport.

public void doExport(Bible bible, String... exportArgs) throws Exception {
    Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    Element osis = doc.createElement("osis");
    osis.setAttribute("xmlns:xsi", "");
    osis.setAttribute("xmlns", "");
    osis.setAttribute("xsi:schemaLocation", "");
    Element osisText = doc.createElement("osisText");
    osisText.setAttribute("canonical", "true");
    osisText.setAttribute("osisIDWork", "Exported");
    osisText.appendChild(buildHeader(doc, bible.getName()));
    for (Book bk : bible.getBooks()) {
        Element book = doc.createElement("div");
        book.setAttribute("type", "book");
        book.setAttribute("canonical", "true");
        book.setAttribute("osisID", bk.getId().getOsisID());
        Element bookTitle = doc.createElement("title");
        bookTitle.setAttribute("type", "main");
        int cnumber = 0;
        for (Chapter chp : bk.getChapters()) {
            Element chapter = doc.createElement("chapter");
            chapter.setAttribute("osisID", bk.getId().getOsisID() + "." + cnumber);
            OSISVisitor visitor = new OSISVisitor(chapter, bk.getId().isNT());
            Element elem = doc.createElement("title");
            elem.setAttribute("type", "chapter");
            elem.appendChild(doc.createTextNode(bk.getAbbr() + " " + cnumber));
            if (chp.getProlog() != null) {
            for (VirtualVerse vv : chp.createVirtualVerses()) {
                String osisID = bk.getId().getOsisID() + "." + cnumber + "." + vv.getNumber();
                for (Headline hl : vv.getHeadlines()) {
                Element verse = doc.createElement("verse");
                verse.setAttribute("osisID", osisID);
                for (Verse v : vv.getVerses()) {
                    if (!v.getNumber().equals("" + vv.getNumber())) {
                        elem = doc.createElement("hi");
                        elem.setAttribute("type", "bold");
                        elem.appendChild(doc.createTextNode("(" + v.getNumber() + ")"));
                    v.accept(new OSISVisitor(verse, bk.getId().isNT()));
    String milestonedElementNames = exportArgs.length > 1 ? exportArgs[1] : "verse";
    if (!milestonedElementNames.equals("-")) {
        Set<String> milestonedElements = new HashSet<>(Arrays.asList(milestonedElementNames.split(",")));
        Set<String> unsupportedMilestonedElements = new HashSet<>(milestonedElements);
        if (!unsupportedMilestonedElements.isEmpty()) {
            for (String elem : unsupportedMilestonedElements) {
                if (GENERATED_UNMILESTONEABLE_ELEMENTS.contains(elem)) {
                    System.out.println("ERROR: " + elem + " may not be milestoned");
                } else {
                    System.out.println("ERROR: " + elem + " is never generated by the OSIS export");
            throw new IllegalArgumentException("Cannot create milestoned elements: " + milestonedElementNames);
        convertChildrenToMilestoned(doc.getDocumentElement(), milestonedElements);
    TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(new File(exportArgs[0])));
Also used : DOMSource(javax.xml.transform.dom.DOMSource) VirtualVerse( StreamResult( Element(org.w3c.dom.Element) Chapter( Document(org.w3c.dom.Document) MetadataBook( Book( Headline( File( VirtualVerse( Verse( HashSet(java.util.HashSet)

Example 29 with Chapter

use of in project BibleMultiConverter by schierlm.

the class OSIS method parseBook.

private void parseBook(String bookName, Element osisBook, Book bibleBook) {
    warningContext = bookName;
    List<Element> unclosedElements = new ArrayList<Element>();
    for (Node node = osisBook.getFirstChild(); node != null; node = node.getNextSibling()) {
        if (node instanceof Text) {
            if (((Text) node).getTextContent().trim().length() > 0)
                printWarning("WARNING: Non-whitespace text at book level");
        } else if (node instanceof Comment) {
        } else {
            Element elem = (Element) node;
            if (elem.getNodeName().equals("title")) {
                if (elem.getAttribute("type").equals("main")) {
                    if (!getTextChildren(elem).equals(bibleBook.getLongName())) {
                        printWarning("WARNING: More than one book title found");
                } else {
                    printWarning("WARNING: invalid book level title type: " + elem.getAttribute("type"));
            } else if (elem.getNodeName().equals("chapter")) {
                String chapterName = elem.getAttribute("osisID");
                if (chapterName.contains("-")) {
                    chapterName = chapterName.substring(0, chapterName.indexOf("-"));
                    printWarning("WARNING: Invalid chapter OSIS reference: " + elem.getAttribute("osisID") + ", using " + chapterName);
                if (!chapterName.startsWith(bookName + ".")) {
                    throw new IllegalStateException("Invalid chapter " + chapterName + " of book " + bookName);
                } else {
                    int cnumber = Integer.parseInt(chapterName.substring(bookName.length() + 1));
                    while (bibleBook.getChapters().size() < cnumber) {
                        bibleBook.getChapters().add(new Chapter());
                    warningContext = chapterName;
                    parseChapter(chapterName, elem, bibleBook.getChapters().get(cnumber - 1), unclosedElements);
                    warningContext = bookName;
            } else {
                printWarning("WARNING: invalid book level tag: " + elem.getNodeName());
    if (unclosedElements.size() > 0) {
        StringBuilder message = new StringBuilder("WARNING: Unclosed milestoned elements:");
        for (Element elem : unclosedElements) {
            message.append(" " + elem.getNodeName() + "[" + elem.getAttribute("sID") + "]");
Also used : Comment(org.w3c.dom.Comment) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) Chapter( Text(org.w3c.dom.Text) FormattedText(

Example 30 with Chapter

use of in project BibleMultiConverter by schierlm.

the class RoundtripHTML method doImport.

public Bible doImport(File inputDir) throws Exception {
    Bible bible;
    // metadata
    try (BufferedReader br = createReader(inputDir, "metadata.js")) {
        String line = br.readLine();
        bible = new Bible(line.substring(13, line.length() - 2).replace("\\\"", "\"").replace("\\\\", "\\"));
        Map<String, Object> fieldMap = new HashMap<String, Object>();
        while ((line = br.readLine()) != null) {
            if (line.startsWith("}")) {
                Book bk = new Book((String) fieldMap.get("abbr"), BookID.fromOsisId((String) fieldMap.get("osis")), (String) fieldMap.get("short"), (String) fieldMap.get("long"));
                for (int i = 0; i < (Integer) fieldMap.get("chapters"); i++) {
                    bk.getChapters().add(new Chapter());
            int pos = line.indexOf(":");
            String key = line.substring(0, pos);
            String value = line.substring(pos + 1);
            if (value.endsWith(","))
                value = value.substring(0, value.length() - 1);
            if (value.startsWith("\"") && value.endsWith("\"")) {
                fieldMap.put(key, value.substring(1, value.length() - 1).replace("\\\"", "\"").replace("\\\\", "\\"));
            } else if (value.equals("true") || value.equals("false")) {
                fieldMap.put(key, Boolean.parseBoolean(value));
            } else {
                fieldMap.put(key, Integer.parseInt(value));
    // chapters
    for (Book bk : bible.getBooks()) {
        int cnumber = 0;
        for (Chapter ch : bk.getChapters()) {
            try (BufferedReader br = createReader(inputDir, getTypeDir(bk.getId()) + "/" + bk.getAbbr() + "_" + cnumber + ".html")) {
                String line;
                List<FormattedText.Visitor<RuntimeException>> footnotes = new ArrayList<>();
                while ((line = br.readLine()) != null) {
                    if (line.equals("<div class=\"biblehtmlcontent prolog\">")) {
                        line = br.readLine();
                        FormattedText prolog = new FormattedText();
                        int end = parseLine(prolog.getAppendVisitor(), line, 0, footnotes);
                        if (end != line.length())
                            throw new IOException(line.substring(end));
                        line = br.readLine();
                        if (!line.equals("</div>"))
                            throw new IOException(line);
                    } else if (line.equals("<div class=\"biblehtmlcontent verses\" id=\"verses\">")) {
                        while ((line = br.readLine()) != null) {
                            if (line.equals("</div>"))
                            if (!line.startsWith("<div class=\"v\" id=\"v") || !line.endsWith("</div>"))
                                throw new IOException(line);
                            line = line.substring(20, line.length() - 6);
                            int pos = line.indexOf("\">");
                            Verse v = new Verse(line.substring(0, pos));
                            int end = parseLine(v.getAppendVisitor(), line, pos + 2, footnotes);
                            if (end != line.length())
                                throw new IOException(line.substring(end));
                        if (!line.equals("</div>"))
                            throw new IOException(line);
                    } else if (line.equals("<div class=\"biblehtmlcontent footnotes\">")) {
                        for (int i = 0; i < footnotes.size(); i++) {
                            line = br.readLine();
                            String prefix = "<div class=\"fn\"><sup class=\"fnt\"><a name=\"fn" + (i + 1) + "\" href=\"#fnm" + (i + 1) + "\">" + (i + 1) + "</a></sup> ";
                            if (!line.startsWith(prefix) || !line.endsWith("</div>"))
                                throw new IOException(line);
                            line = line.substring(prefix.length(), line.length() - 6);
                            int end = parseLine(footnotes.get(i), line, 0, null);
                            if (end != line.length())
                                throw new IOException(line.substring(end));
                        line = br.readLine();
                        if (!line.equals("</div>"))
                            throw new IOException(line);
                if (ch.getProlog() != null)
                for (Verse v : ch.getVerses()) v.finished();
    return bible;
Also used : Visitor( HashMap(java.util.HashMap) Bible( Chapter( ArrayList(java.util.ArrayList) FormattedText( IOException( Book( BufferedReader( Verse(


Chapter ( Book ( Verse ( FormattedText ( VirtualVerse ( MetadataBook ( BookID ( ArrayList (java.util.ArrayList)20 Bible ( Headline ( File ( BufferedWriter ( IOException ( FileOutputStream ( OutputStreamWriter ( EnumMap (java.util.EnumMap)9 HashMap (java.util.HashMap)9 Visitor ( BufferedReader ( FileInputStream (