Search in sources :

Example 11 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class ESwordHTML method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    new StrippedDiffable().mergeIntroductionPrologs(bible);
    String filename = exportArgs[0];
    String marker = exportArgs.length == 1 ? "" : exportArgs[1];
    String title = bible.getName();
    try (BufferedWriter bblx = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename + ".bblx.HTM")), StandardCharsets.UTF_8));
        BufferedWriter cmtx = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename + ".cmtx.HTM")), StandardCharsets.UTF_8))) {
        bblx.write("<html><head>\n" + "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />\n" + "<style>\n" + "p{margin-top:0pt;margin-bottom:0pt;}\n" + "b.headline{font-size:14pt;}\n" + "sup.str{color:#008000;}\n" + ".xref {color:#008000;font-weight:bold;text-decoration:underline;}\n" + "</style>\n" + "</head><body>\n" + "<p>#define description=" + title + marker + "</p>\n" + "<p>#define abbreviation=ChangeMe" + marker + "</p>\n" + "<p>#define comments=Exported by BibleMultiConverter" + marker + "</p>\n" + "<p>#define version=1" + marker + "</p>\n" + "<p>#define strong=0" + marker + "</p>\n" + "<p>#define right2left=0" + marker + "</p>\n" + "<p>#define ot=1" + marker + "</p>\n" + "<p>#define nt=1" + marker + "</p>\n" + "<p>#define font=DEFAULT" + marker + "</p>\n" + "<p>#define apocrypha=1" + marker + "</p>\n" + "<p><span style=\"background-color:#C80000;\">\u00F7</span>" + marker + "</p>\n");
        cmtx.write("<html><head>\n" + "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />\n" + "<style>\n" + "p{margin-top:0pt;margin-bottom:0pt;}\n" + "p.spc{margin-top:10pt;margin-bottom:0pt;}\n" + "p.prologend{border-width:1px;border-top-style:none;border-right-style:none;border-bottom-style:solid;border-left-style:none;border-color:black}\n" + "b.headline{font-size:14pt;}\n" + "sup.str{color:#008000;}\n" + "</style></head><body>\n" + "<p>#define description=" + title + " (Kommentar)" + marker + "</p>\n" + "<p>#define abbreviation=ChangeMe" + marker + "</p>\n" + "<p>#define comments=Exported by BibleMultiConverter" + marker + "</p>\n" + "<p>#define version=1" + marker + "</p>\r\n");
        for (Book book : bible.getBooks()) {
            ESwordBookInfo info = BOOK_INFO_BY_ID.get(book.getId());
            if (info == null) {
                System.out.println("WARNING: Skipping book " + book.getAbbr());
                continue;
            }
            String bname = info.name;
            int cnumber = 0;
            for (Chapter chapter : book.getChapters()) {
                cnumber++;
                if (cnumber > info.versification.length) {
                    System.out.println("WARNING: Skipping chapter " + book.getAbbr() + " " + cnumber);
                    continue;
                }
                int maxVerse = info.versification[cnumber - 1];
                BitSet allowedNumbers = new BitSet(maxVerse + 1);
                allowedNumbers.set(1, maxVerse + 1);
                FormattedText prolog = chapter.getProlog();
                for (VirtualVerse vv : chapter.createVirtualVerses(allowedNumbers)) {
                    int vnumber = vv.getNumber();
                    String vref = bname + " " + cnumber + ":" + vnumber;
                    StringBuilder parsedVerse = new StringBuilder();
                    StringBuilder parsedCommentary = new StringBuilder();
                    for (Headline hl : vv.getHeadlines()) {
                        parsedVerse.append("<b class=\"headline\">");
                        hl.accept(new ESwordVisitor(parsedVerse, marker, book.getId().isNT(), "", "", null, null));
                        parsedVerse.append("</b><br />");
                    }
                    for (Verse v : vv.getVerses()) {
                        if (!v.getNumber().equals("" + vnumber)) {
                            parsedVerse.append("<b>(" + v.getNumber() + ")</b>");
                        }
                        StringBuilder comments = new StringBuilder();
                        if (prolog != null) {
                            prolog.accept(new ESwordVisitor(comments, marker, book.getId().isNT(), "", "", "<i>", "</i>"));
                            comments.append(marker + "</p>\n<!--keep--><p class=\"prologend\">&nbsp;" + marker + "</p>\n<p class=\"spc\">");
                        }
                        v.accept(new ESwordVisitor(parsedVerse, marker, book.getId().isNT(), "", "", null, null));
                        v.accept(new ESwordVisitor(comments, marker, book.getId().isNT(), "<b>", "</b>", "", ""));
                        if (comments.toString().contains("<!--keep-->"))
                            parsedCommentary.append(comments.toString());
                    }
                    if (parsedVerse.length() == 0)
                        parsedVerse.append("-");
                    bblx.write("<p>" + vref + " " + parsedVerse.toString() + marker + "</p>\n");
                    if (parsedCommentary.length() > 0)
                        cmtx.write("<p><span style=\"background-color:#FF0000;\">\u00F7</span>" + vref + marker + "</p>\n<p>" + parsedCommentary.toString() + marker + "</p>\n");
                    prolog = null;
                }
            }
        }
        bblx.write("</body></html>");
        cmtx.write("</body></html>");
    }
}
Also used : VirtualVerse(biblemulticonverter.data.VirtualVerse) Chapter(biblemulticonverter.data.Chapter) BitSet(java.util.BitSet) FormattedText(biblemulticonverter.data.FormattedText) BufferedWriter(java.io.BufferedWriter) Book(biblemulticonverter.data.Book) FileOutputStream(java.io.FileOutputStream) Headline(biblemulticonverter.data.FormattedText.Headline) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 12 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class SWORD method doImport.

protected Bible doImport(Book book) throws Exception {
    OSISHelper helper = new OSISHelper();
    Bible result = new Bible(book.getName());
    TransformerHandler th = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
    Map<BookID, biblemulticonverter.data.Book> parsedBooks = new EnumMap<>(BookID.class);
    Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    List<Verse> allVerses = new ArrayList<>();
    Verse nextCandidate = null;
    for (Iterator<?> iter = book.getGlobalKeyList().iterator(); iter.hasNext(); ) {
        Verse v = (Verse) iter.next();
        while (nextCandidate != null && !nextCandidate.equals(v)) {
            if (book.contains(nextCandidate)) {
                System.out.println("WARNING: Verse (after) skipped by iterator: " + nextCandidate);
                allVerses.add(nextCandidate);
            }
            nextCandidate = nextCandidate.getVersification().add(nextCandidate, 1);
        }
        Verse prevCandidate = v.getVersification().subtract(v, 1);
        List<Verse> versesSkippedBefore = new ArrayList<>();
        while (prevCandidate != null && !allVerses.contains(prevCandidate) && !versesSkippedBefore.contains(prevCandidate)) {
            versesSkippedBefore.add(0, prevCandidate);
            prevCandidate = prevCandidate.getVersification().subtract(prevCandidate, 1);
        }
        for (Verse vv : versesSkippedBefore) {
            if (book.contains(vv)) {
                System.out.println("WARNING: Verse (before) skipped by iterator: " + vv);
                allVerses.add(vv);
            }
        }
        allVerses.add(v);
        nextCandidate = v.getVersification().add(v, 1);
    }
    while (nextCandidate != null) {
        if (book.contains(nextCandidate)) {
            System.out.println("WARNING: Verse (at end) skipped by iterator: " + nextCandidate);
            allVerses.add(nextCandidate);
        }
        Verse nextNextCandidate = nextCandidate.getVersification().add(nextCandidate, 1);
        if (nextCandidate.equals(nextNextCandidate))
            break;
        nextCandidate = nextNextCandidate;
    }
    for (Verse v : allVerses) {
        BookID bkid = biblemulticonverter.sword.BookMapping.MAPPING.get(v.getBook());
        biblemulticonverter.data.Book bk = parsedBooks.get(bkid);
        if (!parsedBooks.containsKey(bkid)) {
            bk = new biblemulticonverter.data.Book(bkid.getOsisID().replace("x-Intr", "Intr"), bkid, bkid.getEnglishName(), bkid.getEnglishName());
            parsedBooks.put(bkid, bk);
            bk.getChapters().add(new Chapter());
            result.getBooks().add(bk);
        }
        int chapterNum = v.getChapter(), verseNum = v.getVerse();
        while (bk.getChapters().size() < chapterNum) bk.getChapters().add(new Chapter());
        Chapter chapter = bk.getChapters().get(chapterNum == 0 ? 0 : chapterNum - 1);
        FormattedText verse;
        if (verseNum == 0) {
            verse = new FormattedText();
            if (chapter.getProlog() != null) {
                chapter.getProlog().accept(verse.getAppendVisitor());
            }
            chapter.setProlog(verse);
        } else {
            if (chapterNum == 0)
                throw new IllegalStateException("Verse " + verseNum + " in chapter 0 is invalid");
            verse = new biblemulticonverter.data.Verse("" + verseNum);
            chapter.getVerses().add((biblemulticonverter.data.Verse) verse);
        }
        Element root = doc.createElement("verse");
        th.setResult(new DOMResult(root));
        new BookData(book, v).getSAXEventProvider().provideSAXEvents(th);
        if (root.getChildNodes().getLength() == 1 && root.getFirstChild() instanceof Element && root.getFirstChild().getNodeName().equals("div") && root.getFirstChild().getChildNodes().getLength() >= 1 && root.getFirstChild().getFirstChild().getNodeName().equals("title")) {
            Element div = (Element) root.getFirstChild();
            root.removeChild(div);
            div.removeChild(div.getFirstChild());
            while (div.getFirstChild() != null) {
                Node child = div.getFirstChild();
                div.removeChild(child);
                root.appendChild(child);
            }
        } else {
            throw new RuntimeException("Unexpected OSIS structure!");
        }
        helper.handleVerse(root, verse);
        if (verse.getElementTypes(1).length() == 0) {
            System.out.println("WARNING: Empty verse " + bk.getAbbr() + " " + chapterNum + ":" + verseNum);
            if (verse instanceof biblemulticonverter.data.Verse)
                chapter.getVerses().remove(verse);
            else
                chapter.setProlog(null);
        }
    }
    for (biblemulticonverter.data.Book bk : parsedBooks.values()) {
        while (!bk.getChapters().isEmpty()) {
            Chapter ch = bk.getChapters().get(bk.getChapters().size() - 1);
            if (ch.getProlog() == null && ch.getVerses().isEmpty()) {
                bk.getChapters().remove(ch);
            } else {
                break;
            }
        }
        if (bk.getChapters().isEmpty()) {
            result.getBooks().remove(bk);
        }
    }
    return result;
}
Also used : TransformerHandler(javax.xml.transform.sax.TransformerHandler) DOMResult(javax.xml.transform.dom.DOMResult) Bible(biblemulticonverter.data.Bible) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) Document(org.w3c.dom.Document) BookID(biblemulticonverter.data.BookID) Book(org.crosswire.jsword.book.Book) BookData(org.crosswire.jsword.book.BookData) EnumMap(java.util.EnumMap) SAXTransformerFactory(javax.xml.transform.sax.SAXTransformerFactory) Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText) Verse(org.crosswire.jsword.passage.Verse)

Example 13 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

@Override
public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
    metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    bible.getBooks().add(metadata.getBook());
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
                metadata.finished();
            }
            line = br.readLine().trim();
        }
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            }
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url = m.group(1);
                String shortName = replaceEntities(m.group(2));
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                    bookIndex++;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                    jcIndex++;
                } else {
                    throw new IOException(url);
                }
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            }
            line = br.readLine().trim();
        }
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        bible.getBooks().add(vorwort);
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
                }
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<p>&nbsp;</p>");
        }
        vorwort.getChapters().get(0).getProlog().finished();
    }
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            bible.getBooks().add(bk);
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                    prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            }
            if (firstProlog)
                throw new IOException(line);
            prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            prolog.finished();
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                    continue;
                }
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                }
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                }
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                        continue;
                    }
                }
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                    hl.getAppendVisitor().visitText(replaceEntities(headline));
                    headlines.add(hl);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    }
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    }
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    }
                    for (Headline h : headlines) {
                        h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
                    }
                    headlines.clear();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                    currentChapter.getVerses().add(currentVerse);
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    bk.getChapters().add(currentChapter);
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        currentChapter.setProlog(prolog);
                        prolog = null;
                    }
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    }
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                }
                if (!newFootnotes.isEmpty()) {
                    footnotes.addAll(newFootnotes);
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                            footnoteVerses.add(currentVerse.getNumber());
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                        }
                    }
                }
                if (restLine != null)
                    line = restLine;
                else
                    line = skipLines(br);
            }
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
                    vv.trimWhitespace();
                    vv.finished();
                }
            }
        }
    }
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    bible.getBooks().add(anhang);
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        }
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                }
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                        break;
                    }
                }
                bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                vv.visitLineBreak(LineBreakKind.NEWLINE);
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
        }
    }
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
    vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                    }
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    throw new IOException(line);
                }
                line = skipLines(br);
            }
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
        }
    }
    anhang.getChapters().get(0).getProlog().trimWhitespace();
    anhang.getChapters().get(0).getProlog().finished();
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) Date(java.util.Date) BufferedReader(java.io.BufferedReader) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat) Verse(biblemulticonverter.data.Verse)

Example 14 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class NeUeParser method getPrologVisitor.

private Visitor<RuntimeException> getPrologVisitor(Book book) {
    FormattedText prolog = new FormattedText();
    book.getChapters().add(new Chapter());
    book.getChapters().get(0).setProlog(prolog);
    return prolog.getAppendVisitor();
}
Also used : Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText)

Example 15 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class OSIS method parseChapter.

private void parseChapter(String chapterName, Element osisChapter, Chapter chapter, List<Element> unclosedElements) {
    int lastVerse = -1;
    List<Headline> headlines = new ArrayList<Headline>();
    for (Node node = osisChapter.getFirstChild(); node != null; node = node.getNextSibling()) {
        boolean startProlog = false;
        if (node instanceof Text) {
            if (node.getTextContent().trim().length() == 0)
                continue;
            if (lastVerse == -1) {
                startProlog = true;
            } else {
                printWarning("WARNING: Non-whitespace at chapter level: " + node.getTextContent());
            }
        } else if (node instanceof Element) {
            Element elem = (Element) node;
            if (elem.getNodeName().equals("title")) {
                Headline hl = new Headline(2);
                if (elem.getAttribute("type").equals("chapter")) {
                    hl = new Headline(1);
                }
                if (elem.getChildNodes().getLength() == 1 && elem.getFirstChild() instanceof Text) {
                    String text = elem.getFirstChild().getTextContent();
                    if (!text.equals(text.trim())) {
                        printWarning("WARNING: Whitespace at beginning/end of headline: '" + text + "'");
                        elem.getFirstChild().setNodeValue(text.trim());
                    }
                }
                convertFromMilestoned(elem, unclosedElements);
                parseFormattedText(null, elem, hl);
                if (hl.getElementTypes(1).length() == 0) {
                    printWarning("WARNING: Empty headline in " + chapterName);
                } else {
                    headlines.add(hl);
                }
            } else if (elem.getNodeName().equals("verse")) {
                String osisID = elem.getAttribute("osisID");
                if (!elem.getAttribute("sID").isEmpty() || !elem.getAttribute("sID").isEmpty())
                    throw new IllegalArgumentException("verse should have been de-milestoned already.");
                if (osisID.isEmpty())
                    throw new IllegalStateException("Verse without osisID");
                if (!osisID.startsWith(chapterName + "."))
                    throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
                String vnumber = osisID.substring(chapterName.length() + 1);
                if (osisID.contains(" ")) {
                    vnumber = vnumber.substring(0, vnumber.indexOf(' '));
                    lastVerse = Integer.parseInt(vnumber);
                    int nextInRange = lastVerse + 1;
                    boolean first = true;
                    for (String part : osisID.split(" ")) {
                        if (first) {
                            first = false;
                            continue;
                        }
                        if (!part.startsWith(chapterName + "."))
                            throw new IllegalStateException("Invalid verse " + osisID + " in chapter " + chapterName);
                        String partNumber = part.substring(chapterName.length() + 1);
                        vnumber = vnumber + "." + partNumber;
                        if (partNumber.equals("" + nextInRange)) {
                            nextInRange++;
                        } else {
                            nextInRange = -1;
                        }
                    }
                    if (nextInRange != -1) {
                        vnumber = lastVerse + "-" + (nextInRange - 1);
                    }
                } else {
                    lastVerse = Integer.parseInt(vnumber);
                }
                Verse verse = new Verse(vnumber);
                warningContext = osisID;
                for (Headline hl : headlines) {
                    hl.accept(verse.getAppendVisitor().visitHeadline(hl.getDepth()));
                }
                headlines.clear();
                chapter.getVerses().add(verse);
                convertFromMilestoned(elem, unclosedElements);
                parseFormattedText(osisID, elem, verse);
                verse.trimWhitespace();
                verse.finished();
                if (verse.getElementTypes(1).length() == 0) {
                    printWarning("WARNING: Empty verse " + osisID);
                    chapter.getVerses().remove(verse);
                }
                warningContext += " (after closing)";
            } else if (lastVerse == -1) {
                startProlog = true;
            } else {
                printWarning("WARNING: " + elem.getNodeName() + " at invalid location");
            }
        }
        if (startProlog) {
            Element holder = osisChapter.getOwnerDocument().createElement("prolog");
            osisChapter.insertBefore(holder, node);
            while (holder.getNextSibling() != null && !holder.getNextSibling().getNodeName().equals("verse")) {
                holder.appendChild(holder.getNextSibling());
            }
            lastVerse = 0;
            FormattedText prolog = new FormattedText();
            chapter.setProlog(prolog);
            for (Headline hl : headlines) {
                hl.accept(prolog.getAppendVisitor().visitHeadline(hl.getDepth()));
            }
            headlines.clear();
            convertFromMilestoned(holder, unclosedElements);
            parseFormattedText(null, holder, prolog);
            prolog.trimWhitespace();
            prolog.finished();
            node = holder;
        }
    }
    if (headlines.size() > 0)
        printWarning("WARNING: Unused headlines: " + headlines.size());
}
Also used : Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) ArrayList(java.util.ArrayList) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) FormattedText(biblemulticonverter.data.FormattedText) Headline(biblemulticonverter.data.FormattedText.Headline) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Aggregations

FormattedText (biblemulticonverter.data.FormattedText)31 Chapter (biblemulticonverter.data.Chapter)25 Book (biblemulticonverter.data.Book)24 Verse (biblemulticonverter.data.Verse)22 Bible (biblemulticonverter.data.Bible)14 BookID (biblemulticonverter.data.BookID)10 Headline (biblemulticonverter.data.FormattedText.Headline)9 ArrayList (java.util.ArrayList)9 MetadataBook (biblemulticonverter.data.MetadataBook)8 IOException (java.io.IOException)8 Visitor (biblemulticonverter.data.FormattedText.Visitor)7 VirtualVerse (biblemulticonverter.data.VirtualVerse)7 File (java.io.File)6 EnumMap (java.util.EnumMap)5 HashMap (java.util.HashMap)4 Matcher (java.util.regex.Matcher)4 Element (org.w3c.dom.Element)3 Node (org.w3c.dom.Node)3 ExtraAttributePriority (biblemulticonverter.data.FormattedText.ExtraAttributePriority)2 FormattingInstructionKind (biblemulticonverter.data.FormattedText.FormattingInstructionKind)2