Search in sources :

Example 6 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class ZefaniaXMLMyBible method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    new StrippedDiffable().mergeIntroductionPrologs(bible);
    final ObjectFactory f = new ObjectFactory();
    XMLBIBLE doc = f.createXMLBIBLE();
    doc.setBiblename(bible.getName());
    doc.setType(EnumModtyp.X_BIBLE);
    BigInteger revision = null;
    MetadataBook metadata = bible.getMetadataBook();
    if (metadata != null) {
        for (MetadataBookKey key : Arrays.asList(MetadataBookKey.revision, MetadataBookKey.version, MetadataBookKey.date, MetadataBookKey.title)) {
            String digits = metadata.getValue(key);
            if (digits == null)
                continue;
            digits = digits.replaceAll("[^0-9]+", "");
            if (!digits.isEmpty()) {
                revision = new BigInteger(digits);
                break;
            }
        }
    }
    if (revision == null) {
        String digits = bible.getName().replaceAll("[^0-9]+", "");
        if (!digits.isEmpty()) {
            revision = new BigInteger(digits);
        }
    }
    if (revision != null) {
        doc.setRevision(revision);
    }
    doc.setINFORMATION(f.createINFORMATION());
    List<DIV> prologs = new ArrayList<DIV>();
    for (Book bk : bible.getBooks()) {
        if (bk.getId().equals(BookID.METADATA))
            continue;
        int bsnumber = bk.getId().getZefID();
        final BIBLEBOOK book = f.createBIBLEBOOK();
        book.setBnumber(BigInteger.valueOf(bsnumber));
        book.setBname(bk.getShortName());
        book.setBsname(bk.getAbbr());
        doc.getBIBLEBOOK().add(book);
        int cnumber = 0;
        for (Chapter cch : bk.getChapters()) {
            cnumber++;
            if (cch.getProlog() != null) {
                DIV xx = f.createDIV();
                prologs.add(xx);
                NOTE xxx = f.createNOTE();
                xx.setNOTE(xxx);
                xxx.setType("x-studynote");
                NOTE prolog = xxx;
                DIV vers = f.createDIV();
                prolog.getContent().add("<p>");
                prolog.getContent().add(vers);
                prolog.getContent().add("</p>");
                vers.setNOTE(f.createNOTE());
                final List<List<Object>> targetStack = new ArrayList<List<Object>>();
                targetStack.add(vers.getNOTE().getContent());
                cch.getProlog().accept(new Visitor<IOException>() {

                    @Override
                    public Visitor<IOException> visitHeadline(int depth) throws IOException {
                        if (depth > 6)
                            depth = 6;
                        STYLE s = f.createSTYLE();
                        s.setCss("-zef-dummy: true");
                        targetStack.get(0).add("<h" + depth + ">");
                        targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                        targetStack.get(0).add("</h" + depth + ">");
                        targetStack.add(0, s.getContent());
                        return this;
                    }

                    @Override
                    public void visitVerseSeparator() throws IOException {
                        STYLE x = f.createSTYLE();
                        x.setCss("color:gray");
                        x.getContent().add("/");
                        targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
                    }

                    @Override
                    public void visitText(String text) throws IOException {
                        targetStack.get(0).add(text);
                    }

                    @Override
                    public Visitor<IOException> visitFormattingInstruction(FormattedText.FormattingInstructionKind kind) throws IOException {
                        String startTag, endTag;
                        if (kind.getHtmlTag() != null) {
                            startTag = "<" + kind.getHtmlTag() + ">";
                            endTag = "</" + kind.getHtmlTag() + ">";
                        } else {
                            startTag = "<span style=\"" + kind.getCss() + "\">";
                            endTag = "</span>";
                        }
                        STYLE s = f.createSTYLE();
                        s.setCss("-zef-dummy: true");
                        targetStack.get(0).add(startTag);
                        targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                        targetStack.get(0).add(endTag);
                        targetStack.add(0, s.getContent());
                        return this;
                    }

                    @Override
                    public Visitor<IOException> visitFootnote() throws IOException {
                        System.out.println("WARNING: Footnotes in prolog are not supported");
                        return null;
                    }

                    @Override
                    public Visitor<IOException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws IOException {
                        System.out.println("WARNING: Cross references in prologs are not supported");
                        STYLE s = f.createSTYLE();
                        s.setCss("-zef-dummy: true");
                        targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                        targetStack.add(0, s.getContent());
                        return this;
                    }

                    @Override
                    public Visitor<IOException> visitVariationText(String[] variations) throws IOException {
                        throw new RuntimeException("Variations not supported");
                    }

                    @Override
                    public void visitLineBreak(LineBreakKind kind) throws IOException {
                        BR br = f.createBR();
                        br.setArt(kind == LineBreakKind.PARAGRAPH ? EnumBreak.X_P : EnumBreak.X_NL);
                        targetStack.get(0).add(" ");
                        targetStack.get(0).add(kind == LineBreakKind.PARAGRAPH ? "<p>" : "<br>");
                        targetStack.get(0).add(br);
                    }

                    @Override
                    public Visitor<IOException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
                        throw new RuntimeException("Grammar tags in prologs not supported");
                    }

                    @Override
                    public FormattedText.Visitor<IOException> visitDictionaryEntry(String dictionary, String entry) throws IOException {
                        throw new RuntimeException("Dictionary entries in prologs not supported");
                    }

                    @Override
                    public void visitRawHTML(RawHTMLMode mode, String raw) throws IOException {
                        throw new RuntimeException("Raw HTML in prologs not supported");
                    }

                    @Override
                    public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
                        STYLE s = f.createSTYLE();
                        s.setCss("-zef-dummy: true");
                        targetStack.get(0).add("<span style=\"" + css + "\">");
                        targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                        targetStack.get(0).add("</span>");
                        targetStack.add(0, s.getContent());
                        return this;
                    }

                    @Override
                    public int visitElementTypes(String elementTypes) throws IOException {
                        return 0;
                    }

                    @Override
                    public Visitor<IOException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws IOException {
                        if (prio == ExtraAttributePriority.KEEP_CONTENT)
                            return visitCSSFormatting("-zef-extra-attribute-" + category + "-" + key + ": " + value);
                        else if (prio == ExtraAttributePriority.SKIP)
                            return null;
                        throw new RuntimeException("Extra attributes not supported");
                    }

                    @Override
                    public void visitStart() throws IOException {
                    }

                    @Override
                    public boolean visitEnd() throws IOException {
                        targetStack.remove(0);
                        return false;
                    }
                });
                if (targetStack.size() != 0)
                    throw new RuntimeException();
            }
            if (cch.getVerses().size() == 0)
                continue;
            CHAPTER chapter = f.createCHAPTER();
            chapter.setCnumber(BigInteger.valueOf(cnumber));
            book.getCHAPTER().add(chapter);
            for (VirtualVerse vv : cch.createVirtualVerses()) {
                for (Headline h : vv.getHeadlines()) {
                    CAPTION caption = f.createCAPTION();
                    EnumCaptionType[] types = new EnumCaptionType[] { null, EnumCaptionType.X_H_1, EnumCaptionType.X_H_2, EnumCaptionType.X_H_3, EnumCaptionType.X_H_4, EnumCaptionType.X_H_5, EnumCaptionType.X_H_6, EnumCaptionType.X_H_6, EnumCaptionType.X_H_6, EnumCaptionType.X_H_6 };
                    caption.setType(types[h.getDepth()]);
                    caption.setVref(BigInteger.valueOf(vv.getNumber()));
                    final StringBuilder sb = new StringBuilder();
                    h.accept(new FormattedText.VisitorAdapter<RuntimeException>(null) {

                        @Override
                        protected void beforeVisit() throws RuntimeException {
                            throw new IllegalStateException();
                        }

                        @Override
                        public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
                            System.out.println("WARNING: Formatting instructions in captions are not supported (stripped)");
                            return this;
                        }

                        @Override
                        public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
                            System.out.println("WARNING: Footnotes in captions are not supported (stripped)");
                            return null;
                        }

                        @Override
                        public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
                            System.out.println("WARNING: CSS Formatting in captions are not supported (stripped)");
                            return this;
                        }

                        @Override
                        public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
                            return prio.handleVisitor(category, this);
                        }

                        public void visitText(String text) throws RuntimeException {
                            sb.append(text);
                        }
                    });
                    caption.getContent().add(sb.toString());
                    chapter.getPROLOGOrCAPTIONOrVERS().add(caption);
                }
                VERS vers = f.createVERS();
                vers.setVnumber(BigInteger.valueOf(vv.getNumber()));
                for (DIV prolog : prologs) {
                    vers.getContent().add(prolog);
                }
                prologs.clear();
                chapter.getPROLOGOrCAPTIONOrVERS().add(vers);
                boolean first = true;
                for (Verse v : vv.getVerses()) {
                    if (!first || !v.getNumber().equals("" + vv.getNumber())) {
                        STYLE x = f.createSTYLE();
                        x.setCss("font-weight: bold");
                        x.getContent().add("(" + v.getNumber() + ")");
                        vers.getContent().add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
                        vers.getContent().add(" ");
                    }
                    first = false;
                    final List<List<Object>> targetStack = new ArrayList<List<Object>>();
                    targetStack.add(vers.getContent());
                    v.accept(new FormattedText.Visitor<IOException>() {

                        @Override
                        public void visitVerseSeparator() throws IOException {
                            STYLE x = f.createSTYLE();
                            x.setCss("color:gray");
                            x.getContent().add("/");
                            targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
                        }

                        @Override
                        public void visitText(String text) throws IOException {
                            targetStack.get(0).add(text);
                        }

                        @Override
                        public FormattedText.Visitor<IOException> visitFormattingInstruction(biblemulticonverter.data.FormattedText.FormattingInstructionKind kind) throws IOException {
                            STYLE x = f.createSTYLE();
                            x.setCss(kind.getCss());
                            targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
                            targetStack.add(0, x.getContent());
                            return this;
                        }

                        @Override
                        public Visitor<IOException> visitFootnote() throws IOException {
                            DIV x = f.createDIV();
                            targetStack.get(0).add(x);
                            NOTE n = f.createNOTE();
                            x.setNOTE(n);
                            n.setType("x-studynote");
                            final List<List<Object>> footnoteStack = new ArrayList<List<Object>>();
                            footnoteStack.add(n.getContent());
                            return new Visitor<IOException>() {

                                @Override
                                public void visitStart() throws IOException {
                                }

                                @Override
                                public void visitVerseSeparator() throws IOException {
                                    STYLE x = f.createSTYLE();
                                    x.setCss("color:gray");
                                    x.getContent().add("/");
                                    footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
                                }

                                @Override
                                public void visitText(String text) throws IOException {
                                    footnoteStack.get(0).add(text);
                                }

                                @Override
                                public void visitLineBreak(LineBreakKind kind) throws IOException {
                                    BR br = f.createBR();
                                    br.setArt(kind == LineBreakKind.PARAGRAPH ? EnumBreak.X_P : EnumBreak.X_NL);
                                    footnoteStack.get(0).add(" ");
                                    footnoteStack.get(0).add(br);
                                }

                                @Override
                                public Visitor<IOException> visitFormattingInstruction(FormattedText.FormattingInstructionKind kind) throws IOException {
                                    String startTag, endTag;
                                    if (kind.getHtmlTag() != null) {
                                        startTag = "<" + kind.getHtmlTag() + ">";
                                        endTag = "</" + kind.getHtmlTag() + ">";
                                    } else {
                                        startTag = "<span style=\"" + kind.getCss() + "\">";
                                        endTag = "</span>";
                                    }
                                    STYLE s = f.createSTYLE();
                                    s.setCss("-zef-dummy: true");
                                    footnoteStack.get(0).add(startTag);
                                    footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                                    footnoteStack.get(0).add(endTag);
                                    footnoteStack.add(0, s.getContent());
                                    return this;
                                }

                                @Override
                                public Visitor<IOException> visitFootnote() throws IOException {
                                    throw new RuntimeException("Footnotes in footnotes are not supported");
                                }

                                @Override
                                public Visitor<IOException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
                                    GRAM gram = f.createGRAM();
                                    if (strongs != null) {
                                        StringBuilder entryBuilder = new StringBuilder();
                                        for (int i = 0; i < strongs.length; i++) {
                                            entryBuilder.append((i > 0 ? " " : "") + strongs[i]);
                                        }
                                        String entry = entryBuilder.toString();
                                        gram.setStr(entry);
                                    }
                                    if (rmac != null) {
                                        StringBuilder entryBuilder = new StringBuilder();
                                        for (int i = 0; i < rmac.length; i++) {
                                            if (i > 0)
                                                entryBuilder.append(' ');
                                            entryBuilder.append(rmac[i]);
                                        }
                                        gram.setRmac(entryBuilder.toString());
                                    }
                                    footnoteStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
                                    footnoteStack.add(0, gram.getContent());
                                    return this;
                                }

                                @Override
                                public FormattedText.Visitor<IOException> visitDictionaryEntry(String dictionary, String entry) throws IOException {
                                    GRAM gram = f.createGRAM();
                                    gram.setStr(entry);
                                    footnoteStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
                                    footnoteStack.add(0, gram.getContent());
                                    return this;
                                }

                                @Override
                                public void visitRawHTML(RawHTMLMode mode, String raw) throws IOException {
                                    if (mode != RawHTMLMode.ONLINE)
                                        footnoteStack.get(0).add(raw);
                                }

                                @Override
                                public Visitor<IOException> visitVariationText(String[] variations) throws IOException {
                                    throw new RuntimeException("Variations not supported");
                                }

                                @Override
                                public FormattedText.Visitor<IOException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws IOException {
                                    STYLE s = f.createSTYLE();
                                    s.setCss("-zef-dummy: true");
                                    int bookID = book.getZefID();
                                    String mscope, xmscope;
                                    try {
                                        int start = firstVerse.equals("^") ? 1 : Integer.parseInt(firstVerse.replaceAll("[a-zG]|[,/][0-9]*", ""));
                                        int end;
                                        if (firstChapter == lastChapter && !lastVerse.equals("$")) {
                                            end = Integer.parseInt(lastVerse.replaceAll("[a-z]|[,/][0-9]*", ""));
                                        } else {
                                            end = -1;
                                        }
                                        mscope = bookID + "," + firstChapter + "," + start + "," + end;
                                        xmscope = bookID + ";" + firstChapter + ";" + start + "-" + end;
                                    } catch (NumberFormatException ex) {
                                        ex.printStackTrace();
                                        mscope = bookID + ",1,1,999";
                                        xmscope = bookID + ";1;1-999";
                                    }
                                    if (footnoteStack.size() == 1) {
                                        List<Object> outerList = targetStack.get(0);
                                        XREF xref = new XREF();
                                        xref.setMscope(xmscope);
                                        outerList.add(outerList.size() - 1, xref);
                                    }
                                    footnoteStack.get(0).add("<a href=\"mybible:content=location&amp;locations=" + mscope + "\">");
                                    footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                                    footnoteStack.get(0).add("</a>");
                                    footnoteStack.add(0, s.getContent());
                                    return this;
                                }

                                public boolean visitEnd() throws IOException {
                                    footnoteStack.remove(0);
                                    return false;
                                }

                                @Override
                                public int visitElementTypes(String elementTypes) throws IOException {
                                    return 0;
                                }

                                @Override
                                public Visitor<IOException> visitHeadline(int depth) throws IOException {
                                    throw new RuntimeException("Headlines in footnotes not supported");
                                }

                                @Override
                                public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
                                    STYLE s = f.createSTYLE();
                                    s.setCss("-zef-dummy: true");
                                    footnoteStack.get(0).add("<span style=\"" + css + "\">");
                                    footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
                                    footnoteStack.get(0).add("</span>");
                                    footnoteStack.add(s.getContent());
                                    return this;
                                }

                                @Override
                                public Visitor<IOException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws IOException {
                                    System.out.println("WARNING: Extra attributes not supported");
                                    Visitor<IOException> result = prio.handleVisitor(category, this);
                                    if (result != null)
                                        footnoteStack.add(0, footnoteStack.get(0));
                                    return result;
                                }
                            };
                        }

                        @Override
                        public FormattedText.Visitor<IOException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws IOException {
                            throw new RuntimeException("Xref outside of footnotes not supported!");
                        }

                        @Override
                        public void visitLineBreak(LineBreakKind kind) throws IOException {
                            BR br = f.createBR();
                            br.setArt(kind == LineBreakKind.PARAGRAPH ? EnumBreak.X_P : EnumBreak.X_NL);
                            targetStack.get(0).add(" ");
                            targetStack.get(0).add(br);
                        }

                        @Override
                        public Visitor<IOException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
                            GRAM gram = f.createGRAM();
                            if (strongs != null) {
                                StringBuilder entryBuilder = new StringBuilder();
                                for (int i = 0; i < strongs.length; i++) {
                                    entryBuilder.append((i > 0 ? " " : "") + strongs[i]);
                                }
                                String entry = entryBuilder.toString();
                                gram.setStr(entry);
                            }
                            if (rmac != null) {
                                StringBuilder entryBuilder = new StringBuilder();
                                for (int i = 0; i < rmac.length; i++) {
                                    if (i > 0)
                                        entryBuilder.append(' ');
                                    entryBuilder.append(rmac[i]);
                                }
                                gram.setRmac(entryBuilder.toString());
                            }
                            targetStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
                            targetStack.add(0, gram.getContent());
                            return this;
                        }

                        @Override
                        public FormattedText.Visitor<IOException> visitDictionaryEntry(String dictionary, String entry) throws IOException {
                            GRAM gram = f.createGRAM();
                            gram.setStr(entry);
                            targetStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
                            targetStack.add(0, gram.getContent());
                            return this;
                        }

                        @Override
                        public void visitRawHTML(RawHTMLMode mode, String raw) throws IOException {
                            throw new RuntimeException("Raw HTML is not supported");
                        }

                        @Override
                        public Visitor<IOException> visitVariationText(String[] variations) throws IOException {
                            throw new RuntimeException("Variations not supported");
                        }

                        @Override
                        public boolean visitEnd() throws IOException {
                            targetStack.remove(0);
                            return false;
                        }

                        @Override
                        public int visitElementTypes(String elementTypes) throws IOException {
                            return 0;
                        }

                        @Override
                        public Visitor<IOException> visitHeadline(int depth) throws IOException {
                            throw new RuntimeException("Headline in virtual verse is impossible");
                        }

                        @Override
                        public void visitStart() throws IOException {
                        }

                        @Override
                        public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
                            STYLE x = f.createSTYLE();
                            x.setCss(css);
                            targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
                            targetStack.add(0, x.getContent());
                            return this;
                        }

                        @Override
                        public Visitor<IOException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws IOException {
                            System.out.println("WARNING: Extra attributes not supported");
                            Visitor<IOException> result = prio.handleVisitor(category, this);
                            if (result != null)
                                targetStack.add(0, targetStack.get(0));
                            return result;
                        }
                    });
                    if (targetStack.size() != 0)
                        throw new RuntimeException();
                }
            }
        }
        if (book.getCHAPTER().size() == 0) {
            doc.getBIBLEBOOK().remove(book);
        }
    }
    final Document docc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    JAXBContext.newInstance(ObjectFactory.class.getPackage().getName()).createMarshaller().marshal(doc, docc);
    docc.getDocumentElement().setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
    docc.getDocumentElement().setAttribute("xsi:noNamespaceSchemaLocation", "zef2005.xsd");
    docc.normalize();
    maskWhitespaceNodes(docc.getDocumentElement());
    try (FileOutputStream fos = new FileOutputStream(exportArgs[0])) {
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
        transformer.transform(new DOMSource(docc), new StreamResult(fos));
    }
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) VirtualVerse(biblemulticonverter.data.VirtualVerse) ArrayList(java.util.ArrayList) RawHTMLMode(biblemulticonverter.data.FormattedText.RawHTMLMode) Document(org.w3c.dom.Document) ObjectFactory(biblemulticonverter.schema.zef2005.ObjectFactory) BookID(biblemulticonverter.data.BookID) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) List(java.util.List) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) StreamResult(javax.xml.transform.stream.StreamResult) STYLE(biblemulticonverter.schema.zef2005.STYLE) JAXBElement(javax.xml.bind.JAXBElement) FormattedText(biblemulticonverter.data.FormattedText) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) LineBreakKind(biblemulticonverter.data.FormattedText.LineBreakKind) XREF(biblemulticonverter.schema.zef2005.XREF) FileOutputStream(java.io.FileOutputStream) BigInteger(java.math.BigInteger) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) ExtraAttributePriority(biblemulticonverter.data.FormattedText.ExtraAttributePriority) Transformer(javax.xml.transform.Transformer) Visitor(biblemulticonverter.data.FormattedText.Visitor) BR(biblemulticonverter.schema.zef2005.BR) NOTE(biblemulticonverter.schema.zef2005.NOTE) MetadataBookKey(biblemulticonverter.data.MetadataBook.MetadataBookKey) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) XMLBIBLE(biblemulticonverter.schema.zef2005.XMLBIBLE) QName(javax.xml.namespace.QName) Chapter(biblemulticonverter.data.Chapter) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) IOException(java.io.IOException) DIV(biblemulticonverter.schema.zef2005.DIV) EnumCaptionType(biblemulticonverter.schema.zef2005.EnumCaptionType) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) GRAM(biblemulticonverter.schema.zef2005.GRAM) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 7 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class ZefaniaXMLRoundtrip method parseBible.

protected Bible parseBible(XMLBIBLE doc) throws Exception {
    Bible result = new Bible(doc.getBiblename());
    MetadataBook metadata = new MetadataBook();
    if (doc.getStatus() != null) {
        metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
    }
    if (doc.getVersion() != null) {
        metadata.setValue(MetadataBookKey.version, doc.getVersion());
    }
    if (doc.getRevision() != null) {
        metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
    }
    for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
        if (elem.getValue() == null)
            continue;
        String value = normalize(elem.getValue().toString(), true).trim();
        if (value.length() == 0)
            value = "-empty-";
        metadata.setValue(elem.getName().getLocalPart(), value);
    }
    metadata.finished();
    if (metadata.getKeys().size() > 0)
        result.getBooks().add(metadata.getBook());
    Set<String> abbrs = new HashSet<String>();
    Set<String> shortnames = new HashSet<String>();
    Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
    List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        int number = e.getBnumber().intValue();
        BookID bookID = BookID.fromZefId(number);
        if (shortname == null)
            shortname = "_" + bookID.getOsisID();
        else if (shortname.length() == 0)
            shortname = "_" + bookID.getOsisID() + "[[]]";
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        abbrMap.put(bookID, abbr);
    }
    abbrs.clear();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        String longname = e.getBname();
        int number = e.getBnumber().intValue();
        BookID bookID = BookID.fromZefId(number);
        if (shortname == null)
            shortname = "_" + bookID.getOsisID();
        else if (shortname.length() == 0)
            shortname = "_" + bookID.getOsisID() + "[[]]";
        if (longname == null)
            longname = "_" + bookID.getEnglishName();
        else if (longname.length() == 0)
            longname = "_" + bookID.getEnglishName() + "[[]]";
        else
            longname = longname.replaceAll("  ++", " ").trim();
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
            System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
            shortname = "Exo[[Gen]]";
            longname = "Exodus[[Genesis]]";
        }
        if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
            System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
            shortname = "2Chr[[1Chr]]";
        }
        if (shortnames.contains(shortname)) {
            System.out.println("WARNING: Duplicate short name " + shortname);
            for (int i = 2; i < 100; i++) {
                if (!shortnames.contains(shortname + i + "[[" + shortname + "]]")) {
                    shortname = shortname + i + "[[" + shortname + "]]";
                    break;
                }
            }
        }
        shortnames.add(shortname);
        Book book = new Book(abbr, bookID, shortname, longname);
        int lastvref = -1;
        List<Headline> headlineBuffer = new ArrayList<Headline>();
        for (CHAPTER e2 : e.getCHAPTER()) {
            int chapterNumber = e2.getCnumber().intValue();
            while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
            Chapter chapter = book.getChapters().get(chapterNumber - 1);
            for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
                if (e3 instanceof CAPTION) {
                    CAPTION caption = (CAPTION) e3;
                    if (lastvref != -1 && lastvref != caption.getVref().intValue())
                        throw new IOException();
                    lastvref = caption.getVref().intValue();
                    int level;
                    if (caption.getType() == null) {
                        level = 9;
                    } else {
                        switch(caption.getType()) {
                            case X_H_1:
                                level = 1;
                                break;
                            case X_H_2:
                                level = 2;
                                break;
                            case X_H_3:
                                level = 3;
                                break;
                            case X_H_4:
                                level = 4;
                                break;
                            case X_H_5:
                                level = 5;
                                break;
                            case X_H_6:
                                level = 6;
                                break;
                            default:
                                throw new IOException();
                        }
                    }
                    Headline h = new Headline(level);
                    headlineBuffer.add(h);
                    if (!parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
                        visitEmptyMarker(h.getAppendVisitor());
                    } else {
                        h.trimWhitespace();
                    }
                    h.finished();
                } else if (e3 instanceof REMARK) {
                    REMARK remark = (REMARK) e3;
                    int vref = remark.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        throw new IOException(vref + ":" + remark.getContent());
                    Verse v = chapter.getVerses().get(idx);
                    if (remark.getContent().size() != 1)
                        throw new IOException();
                    String remarkText = normalize((String) remark.getContent().get(0), true).trim();
                    v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "remark").visitFootnote().visitText(remarkText);
                } else if (e3 instanceof XREF) {
                    XREF xref = (XREF) e3;
                    int vref = xref.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        throw new IOException(vref + ":" + xref.getMscope());
                    Verse v = chapter.getVerses().get(idx);
                    Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "outer-xref").visitFootnote();
                    boolean first = true;
                    for (String mscope : xref.getMscope().split(" ")) {
                        Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                        if (!m.matches())
                            throw new IOException(mscope);
                        BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                        int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
                        if (m.group(3) != null)
                            endChapter = Integer.parseInt(m.group(3).substring(1));
                        String verse = m.group(4);
                        if (verse.equals("0"))
                            verse = "1//G";
                        String endVerse = m.group(5);
                        if (endVerse == null)
                            endVerse = verse;
                        else
                            endVerse = endVerse.substring(1);
                        if (endVerse.equals("0"))
                            endVerse = "1//G";
                        String xrefAbbr = abbrMap.get(xrefBookID);
                        if (xrefAbbr == null)
                            xrefAbbr = xrefBookID.getOsisID();
                        if (first)
                            first = false;
                        else
                            footnoteVisitor.visitText(" ");
                        if (xrefChapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
                            String tmp = verse;
                            verse = endVerse;
                            endVerse = tmp;
                        }
                        footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
                    }
                } else if (e3 instanceof PROLOG) {
                    PROLOG prolog = (PROLOG) e3;
                    if (prolog.getVref().intValue() != 1)
                        throw new IOException("" + prolog.getVref());
                    if (chapter.getProlog() != null)
                        throw new IOException("More than one prolog found");
                    FormattedText prologText = new FormattedText();
                    if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
                        prologText.trimWhitespace();
                        prologText.finished();
                        chapter.setProlog(prologText);
                    }
                } else if (e3 instanceof VERS) {
                    VERS vers = (VERS) e3;
                    int vnumber = vers.getVnumber().intValue();
                    if (lastvref != -1) {
                        if (lastvref != vnumber)
                            throw new IOException(lastvref + " != " + vnumber);
                        lastvref = -1;
                    }
                    Verse verse = new Verse("" + vnumber);
                    Visitor<RuntimeException> visitor = verse.getAppendVisitor();
                    boolean contentFound = false;
                    if (headlineBuffer.size() > 0) {
                        for (Headline h : headlineBuffer) {
                            h.accept(visitor.visitHeadline(h.getDepth()));
                        }
                        headlineBuffer.clear();
                        contentFound = true;
                    }
                    contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
                    if (!contentFound) {
                        visitEmptyMarker(visitor);
                    }
                    verse.trimWhitespace();
                    chapter.getVerses().add(verse);
                } else {
                    throw new IOException(e3.getClass().toString());
                }
            }
            for (Verse v : chapter.getVerses()) v.finished();
        }
        result.getBooks().add(book);
    }
    return result;
}
Also used : Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) BookID(biblemulticonverter.data.BookID) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) PROLOG(biblemulticonverter.schema.zef2005.PROLOG) EnumMap(java.util.EnumMap) REMARK(biblemulticonverter.schema.zef2005.REMARK) HashSet(java.util.HashSet) MetadataBook(biblemulticonverter.data.MetadataBook) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) XREF(biblemulticonverter.schema.zef2005.XREF) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 8 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class ZefaniaXMLRoundtrip method createXMLBible.

protected XMLBIBLE createXMLBible(Bible bible) throws Exception {
    ObjectFactory of = new ObjectFactory();
    XMLBIBLE doc = of.createXMLBIBLE();
    doc.setBiblename(bible.getName());
    doc.setType(EnumModtyp.X_BIBLE);
    doc.setINFORMATION(of.createINFORMATION());
    MetadataBook metadata = bible.getMetadataBook();
    if (metadata != null) {
        for (String key : metadata.getKeys()) {
            String value = metadata.getValue(key);
            if (value.equals("-empty-"))
                value = "";
            if (key.equals(MetadataBookKey.status.toString())) {
                doc.setStatus(EnumStatus.fromValue(value));
            } else if (key.equals(MetadataBookKey.version.toString())) {
                doc.setVersion(value);
            } else if (key.equals(MetadataBookKey.revision.toString())) {
                doc.setRevision(new BigInteger(value));
            } else if (Arrays.asList(INFORMATION_KEYS).contains(key)) {
                doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
            }
        }
    }
    for (Book bk : bible.getBooks()) {
        if (bk.getId().equals(BookID.METADATA))
            continue;
        if (bk.getId().getZefID() <= 0) {
            System.out.println("WARNING: Unable to export book " + bk.getAbbr());
            continue;
        }
        String shortname = removeRoundtripMarker(bk.getShortName());
        String longname = removeRoundtripMarker(bk.getLongName());
        BookID bookID = bk.getId();
        BIBLEBOOK bb = of.createBIBLEBOOK();
        bb.setBnumber(BigInteger.valueOf(bookID.getZefID()));
        if (!shortname.equals("_" + bookID.getOsisID()))
            bb.setBsname(shortname);
        if (!longname.equals("_" + bookID.getEnglishName()))
            bb.setBname(longname);
        int cnumber = 0;
        for (Chapter ccc : bk.getChapters()) {
            cnumber++;
            if (ccc.getVerses().size() == 0)
                continue;
            CHAPTER cc = of.createCHAPTER();
            cc.setCnumber(BigInteger.valueOf(cnumber));
            bb.getCHAPTER().add(cc);
            if (ccc.getProlog() != null) {
                PROLOG prolog = of.createPROLOG();
                prolog.setVref(BigInteger.ONE);
                ccc.getProlog().accept(new CreateContentVisitor(of, prolog.getContent(), null, 0, null));
                cc.getPROLOGOrCAPTIONOrVERS().add(prolog);
            }
            for (VirtualVerse vv : ccc.createVirtualVerses()) {
                for (Headline h : vv.getHeadlines()) {
                    CAPTION caption = of.createCAPTION();
                    caption.setVref(BigInteger.valueOf(vv.getNumber()));
                    h.accept(new CreateContentVisitor(of, caption.getContent(), null, 0, null));
                    EnumCaptionType[] types = new EnumCaptionType[] { null, EnumCaptionType.X_H_1, EnumCaptionType.X_H_2, EnumCaptionType.X_H_3, EnumCaptionType.X_H_4, EnumCaptionType.X_H_5, EnumCaptionType.X_H_6, null, null, null };
                    caption.setType(types[h.getDepth()]);
                    cc.getPROLOGOrCAPTIONOrVERS().add(caption);
                }
                List<Object> remarksAndXrefs = new ArrayList<Object>();
                VERS vers = of.createVERS();
                vers.setVnumber(BigInteger.valueOf(vv.getNumber()));
                for (Verse v : vv.getVerses()) {
                    if (!v.getNumber().equals("" + vv.getNumber())) {
                        STYLE verseNum = of.createSTYLE();
                        verseNum.setCss("font-weight: bold");
                        verseNum.getContent().add("(" + v.getNumber() + ")");
                        vers.getContent().add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, verseNum));
                        vers.getContent().add(" ");
                    }
                    v.accept(new CreateContentVisitor(of, vers.getContent(), remarksAndXrefs, vv.getNumber(), null));
                }
                cc.getPROLOGOrCAPTIONOrVERS().add(vers);
                cc.getPROLOGOrCAPTIONOrVERS().addAll(remarksAndXrefs);
            }
        }
        doc.getBIBLEBOOK().add(bb);
    }
    return doc;
}
Also used : VirtualVerse(biblemulticonverter.data.VirtualVerse) ArrayList(java.util.ArrayList) ObjectFactory(biblemulticonverter.schema.zef2005.ObjectFactory) BookID(biblemulticonverter.data.BookID) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) PROLOG(biblemulticonverter.schema.zef2005.PROLOG) MetadataBook(biblemulticonverter.data.MetadataBook) XMLBIBLE(biblemulticonverter.schema.zef2005.XMLBIBLE) QName(javax.xml.namespace.QName) Chapter(biblemulticonverter.data.Chapter) STYLE(biblemulticonverter.schema.zef2005.STYLE) JAXBElement(javax.xml.bind.JAXBElement) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) EnumCaptionType(biblemulticonverter.schema.zef2005.EnumCaptionType) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) BigInteger(java.math.BigInteger) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 9 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class NeUeParser method doImport.

@Override
public Bible doImport(File inputDirectory) throws Exception {
    Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
    MetadataBook metadata = new MetadataBook();
    metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
    metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
    metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
    metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
    metadata.setValue(MetadataBookKey.language, "GER");
    bible.getBooks().add(metadata.getBook());
    String mainFile = "NeUe.htm";
    if (!new File(inputDirectory, mainFile).exists())
        mainFile = "index.htm";
    try (BufferedReader br = createReader(inputDirectory, mainFile)) {
        String line = br.readLine().trim();
        while (!line.startsWith("<p class=\"u3\">")) {
            if (line.contains("Textstand: ")) {
                line = line.substring(line.indexOf("Textstand: ") + 11);
                line = line.substring(0, line.indexOf('<'));
                metadata.setValue(MetadataBookKey.version, line);
                metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
                metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
                metadata.finished();
            }
            line = br.readLine().trim();
        }
        Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a>&nbsp;&nbsp;(?:</p>)?");
        int bookIndex = 0, jcIndex = 0;
        while (!line.startsWith("<a name=\"vorwort\">")) {
            if (line.equals("<br>")) {
                line = br.readLine().trim();
                if (line.startsWith("&raquo;&raquo;&nbsp;&nbsp;"))
                    line = line.substring("&raquo;&raquo;&nbsp;&nbsp;".length());
            }
            Matcher m = tocPattern.matcher(line);
            if (m.matches()) {
                String url = m.group(1);
                String shortName = replaceEntities(m.group(2));
                if (url.endsWith(".html#bb")) {
                    String filename = url.substring(0, url.length() - 8);
                    BookMetadata bm = METADATA[bookIndex];
                    if (!bm.filename.equals(filename))
                        throw new IOException(filename + "/" + bm.filename);
                    bm.shortname = shortName;
                    bookIndex++;
                } else if (url.startsWith("0")) {
                    if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
                        throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
                    jcIndex++;
                } else {
                    throw new IOException(url);
                }
            } else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p>&nbsp;</p>") && !line.equals("<p><a name=\"bb\">&nbsp;</a></p>")) {
                throw new IOException(line);
            }
            line = br.readLine().trim();
        }
        if (bookIndex != METADATA.length)
            throw new IOException(bookIndex + " != " + METADATA.length);
        if (jcIndex == 0)
            JESUS_CHRONIK = new String[0];
        if (jcIndex != JESUS_CHRONIK.length)
            throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
        // Vorwort
        Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
        bible.getBooks().add(vorwort);
        Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
        boolean needParagraph = false;
        if (line.endsWith("</a><br>"))
            line = br.readLine().trim();
        while (!line.startsWith("<div align=\"right\">")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            if (line.startsWith("<h2>")) {
                if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
                    throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
            } else if (line.startsWith("<h4>")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<h4 id=")) {
                parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
                needParagraph = false;
            } else if (line.startsWith("<div class=\"fn\">")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
            } else if (line.startsWith("<p>")) {
                if (needParagraph)
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                needParagraph = true;
                if (line.endsWith("<br />"))
                    line += br.readLine().trim();
                parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
            } else if (line.equals("<ul>")) {
                while (!line.equals("</ul>")) {
                    line = br.readLine();
                }
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<p>&nbsp;</p>");
        }
        vorwort.getChapters().get(0).getProlog().finished();
    }
    for (BookMetadata bm : METADATA) {
        if (!new File(inputDirectory, bm.filename + ".html").exists()) {
            System.out.println("*** Skipping " + bm.filename + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
            String line = br.readLine().trim();
            line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "&raquo;&raquo;");
            if (!line.equals("<p><a name=\"bb\">&nbsp;</a></p>") && !line.equals("<p><a id=\"bb\">&nbsp;</a></p>"))
                throw new IOException(line);
            line = skipLines(br);
            if (line.equals("<p>&nbsp;</p>"))
                line = br.readLine().trim();
            Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
            bible.getBooks().add(bk);
            line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p>&nbsp;</p>");
            FormattedText prolog = new FormattedText();
            prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
            line = skipLines(br);
            boolean firstProlog = true;
            while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
                if (firstProlog) {
                    firstProlog = false;
                } else {
                    prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                }
                parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
                line = skipLines(br);
            }
            if (firstProlog)
                throw new IOException(line);
            prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
            parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
            prolog.finished();
            line = skipLines(br);
            if (!line.startsWith("<h"))
                throw new IOException(line);
            char minHeadline = line.charAt(2);
            List<Headline> headlines = new ArrayList<>();
            boolean inParagraph = false;
            Chapter currentChapter = null;
            Verse currentVerse = null;
            List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
            List<String> footnoteVerses = new ArrayList<>();
            while (!line.equals("<hr>")) {
                if (line.startsWith("<p>&nbsp;</p>")) {
                    line = line.substring(13).trim();
                    if (line.length() == 0)
                        line = skipLines(br);
                    continue;
                }
                String restLine = null;
                List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
                while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
                if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
                    line = "<p>" + line.substring(16);
                }
                if (line.matches(".*</p>.+")) {
                    int pos = line.indexOf("</p>");
                    restLine = line.substring(pos + 4).trim();
                    line = line.substring(0, pos + 4);
                }
                if (!inParagraph && line.startsWith("<p>")) {
                    inParagraph = true;
                    line = line.substring(3).trim();
                    if (line.length() == 0) {
                        line = skipLines(br);
                        continue;
                    }
                }
                if (line.indexOf("<span class=\"vers\">", 1) != -1) {
                    int pos = line.indexOf("<span class=\"vers\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                if (line.indexOf("<p class=\"poet\">", 1) != -1) {
                    int pos = line.indexOf("<p class=\"poet\">", 1);
                    restLine = line.substring(pos) + (restLine == null ? "" : restLine);
                    line = line.substring(0, pos).trim();
                }
                while (line.endsWith("&nbsp;")) line = line.substring(0, line.length() - 6);
                if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
                    Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
                    String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
                    if (headline.contains("*"))
                        throw new IOException(headline);
                    hl.getAppendVisitor().visitText(replaceEntities(headline));
                    headlines.add(hl);
                } else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
                    int pos = line.indexOf("</span>");
                    if (pos == -1)
                        throw new IOException(line);
                    String vs = line.substring(19, pos).trim();
                    if (vs.endsWith("&nbsp;")) {
                        vs = cutAffix(vs, "", "&nbsp;");
                    }
                    if (vs.matches("[0-9]+(,[0-9]+)?")) {
                        currentVerse = new Verse(vs);
                    } else {
                        throw new IOException(vs);
                    }
                    line = line.substring(pos + 7);
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    if (line.startsWith("&nbsp;")) {
                        line = line.substring(6);
                    }
                    for (Headline h : headlines) {
                        h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
                    }
                    headlines.clear();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                    currentChapter.getVerses().add(currentVerse);
                } else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
                    int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
                    currentChapter = new Chapter();
                    currentVerse = null;
                    bk.getChapters().add(currentChapter);
                    if (chap != bk.getChapters().size())
                        throw new IOException(chap + "/" + bk.getChapters().size());
                    if (prolog != null) {
                        currentChapter.setProlog(prolog);
                        prolog = null;
                    }
                } else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
                    String content = cutAffix(line, "<div class=\"fn\">", "</div>");
                    if (footnoteVerses.size() == 0)
                        throw new IOException(line);
                    String prefix = footnoteVerses.remove(0) + ":";
                    if (!content.startsWith(prefix)) {
                        throw new IOException(prefix + " / " + content);
                    }
                    parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
                } else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith("&nbsp;") || line.startsWith("<span class=\"u2\">"))) {
                    if (line.endsWith("</p>")) {
                        inParagraph = false;
                        line = line.substring(0, line.length() - 4);
                    }
                    line = line.trim();
                    parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
                    if (!inParagraph)
                        currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    System.err.println("Next line: " + br.readLine());
                    throw new IOException(line);
                }
                if (!newFootnotes.isEmpty()) {
                    footnotes.addAll(newFootnotes);
                    for (int i = 0; i < newFootnotes.size(); i++) {
                        if (currentVerse.getNumber().contains(",")) {
                            footnoteVerses.add(currentVerse.getNumber());
                        } else {
                            footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
                        }
                    }
                }
                if (restLine != null)
                    line = restLine;
                else
                    line = skipLines(br);
            }
            if (!headlines.isEmpty())
                throw new IOException("" + headlines.size());
            if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
                throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
            for (Chapter ch : bk.getChapters()) {
                for (Verse vv : ch.getVerses()) {
                    vv.trimWhitespace();
                    vv.finished();
                }
            }
        }
    }
    // Anhang
    Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
    bible.getBooks().add(anhang);
    Visitor<RuntimeException> vv = getPrologVisitor(anhang);
    vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
    try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
        String line = br.readLine().trim();
        while (!line.startsWith("<a name=\"at\">")) {
            line = br.readLine().trim();
        }
        while (!line.equals("</body>")) {
            line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
            line = line.replaceAll("> +<", "><");
            line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
            if (line.startsWith("<h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h2>")) {
                parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
            } else if (line.startsWith("<h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
            } else if (line.startsWith("<a href=\"#top\"><h3>")) {
                parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
            } else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
                String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
                line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
                if (line.contains("<td><br /><br /><a href")) {
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                    line = line.replace("<td><br /><br /><a href", "<td><a href");
                }
                String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
                Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
                BookMetadata m = null;
                for (BookMetadata bm : METADATA) {
                    if (bm.filename.equals(parts[0])) {
                        m = bm;
                        break;
                    }
                }
                bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
                bold.visitText(" " + replaceEntities(title));
                vv.visitLineBreak(LineBreakKind.NEWLINE);
                line = br.readLine().trim();
                while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
                vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
                vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                line = br.readLine().trim();
                if (!line.equals("</tr>"))
                    throw new IOException(line);
            } else {
                throw new IOException(line);
            }
            line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>", "<p>&nbsp;</p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
        }
    }
    // Hesekiels Tempel
    vv.visitHeadline(1).visitText("Hesekiels Tempel");
    Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
    vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
    vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
    vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
    // Jesus-Chronik
    if (JESUS_CHRONIK.length > 0)
        vv.visitHeadline(1).visitText("Die Jesus-Chronik");
    for (String name : JESUS_CHRONIK) {
        if (!new File(inputDirectory, name + ".html").exists()) {
            System.out.println("*** Skipping " + name + " - file not found ***");
            continue;
        }
        try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
            String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
            List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
            List<String> footnotePrefixes = new ArrayList<>();
            while (!line.startsWith("</body>")) {
                line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
                if (line.startsWith("<h2>")) {
                    parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
                } else if (line.startsWith("<div class=\"fn\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
                    for (String fn : fns) {
                        fn = fn.trim();
                        String pfx = footnotePrefixes.remove(0);
                        Visitor<RuntimeException> fnv = footnoteList.remove(0);
                        if (!fn.startsWith(pfx))
                            throw new IOException(pfx + " / " + fn);
                        parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
                    }
                } else if (line.startsWith("<p><div class=\"rot\">")) {
                    String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
                    int pos = line.indexOf("</b><br />");
                    parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
                    String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
                    if (!xref.isEmpty())
                        parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<p>")) {
                    parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("&copy;")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (line.startsWith("<div class=\"e\">")) {
                    while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
                    parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
                    vv.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else {
                    throw new IOException(line);
                }
                line = skipLines(br);
            }
            if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
                throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
        }
    }
    anhang.getChapters().get(0).getProlog().trimWhitespace();
    anhang.getChapters().get(0).getProlog().finished();
    return bible;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Headline(biblemulticonverter.data.FormattedText.Headline) MetadataBook(biblemulticonverter.data.MetadataBook) Pattern(java.util.regex.Pattern) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) Date(java.util.Date) BufferedReader(java.io.BufferedReader) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat) Verse(biblemulticonverter.data.Verse)

Example 10 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class OSIS method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    ValidateXML.validateFileBeforeParsing(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(ObjectFactory.class.getResource("/osisCore.2.1.1.xsd")), inputFile);
    printedWarnings.clear();
    DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    XPath xpath = javax.xml.xpath.XPathFactory.newInstance().newXPath();
    Document osisDoc = docBuilder.parse(inputFile);
    String name = xpath.evaluate("/osis/osisText/header/work/title/text()", osisDoc);
    if (name.isEmpty())
        name = "OSIS Bible";
    Bible result = new Bible(name);
    String description = xpath.evaluate("/osis/osisText/header/work/description/text()", osisDoc);
    String rights = xpath.evaluate("/osis/osisText/header/work/rights/text()", osisDoc);
    if (!description.isEmpty() || !rights.isEmpty()) {
        String date = xpath.evaluate("/osis/osisText/header/work/date/text()", osisDoc);
        String titleDesc = xpath.evaluate("/osis/osisText/titlePage/description/text()", osisDoc);
        MetadataBook mb = new MetadataBook();
        if (!description.isEmpty())
            mb.setValue(MetadataBookKey.description, description.replaceAll("[\r\n\t ]+", " ").trim());
        if (!rights.isEmpty())
            mb.setValue(MetadataBookKey.rights, rights.replaceAll("[\r\n\t ]+", " ").trim());
        if (!date.isEmpty())
            mb.setValue(MetadataBookKey.date, date);
        if (!titleDesc.isEmpty())
            mb.setValue("description@titlePage", titleDesc.replaceAll("[\r\n\t ]+", " ").trim());
        mb.finished();
        result.getBooks().add(mb.getBook());
    }
    NodeList osisBooks = (NodeList) xpath.evaluate("/osis/osisText//div[@type='book']", osisDoc, XPathConstants.NODESET);
    for (int bookIndex = 0; bookIndex < osisBooks.getLength(); bookIndex++) {
        Element osisBook = (Element) osisBooks.item(bookIndex);
        if (!osisBook.getAttribute("sID").isEmpty()) {
            Element osisBookEnd = (Element) xpath.evaluate("//div[@eID='" + osisBook.getAttribute("sID") + "']", osisDoc, XPathConstants.NODE);
            if (osisBookEnd == null) {
                throw new IllegalStateException("No milestoned div found with eID " + osisBook.getAttribute("sID"));
            }
            if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
                List<Node> candidates = new ArrayList<>();
                Node commonParent = osisBookEnd;
                while (commonParent != null) {
                    candidates.add(commonParent);
                    commonParent = commonParent.getParentNode();
                }
                commonParent = osisBook;
                search: while (commonParent != null) {
                    for (Node candidate : candidates) {
                        if (commonParent.isSameNode(candidate)) {
                            break search;
                        }
                    }
                    commonParent = commonParent.getParentNode();
                }
                if (commonParent == null)
                    throw new IllegalStateException("Unable to find common parent of milestoned div start and end tag");
                convertToMilestoned((Element) commonParent);
                if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
                    throw new IllegalStateException("Unable to normalize XML so that milestoned div start and end tags are siblings");
                }
            }
            while (osisBook.getNextSibling() != null && !osisBook.getNextSibling().isSameNode(osisBookEnd)) {
                osisBook.appendChild(osisBook.getNextSibling());
            }
            osisBookEnd.getParentNode().removeChild(osisBookEnd);
        }
        String bookOsisID = osisBook.getAttribute("osisID");
        BookID bookID = BookID.fromOsisId(bookOsisID);
        String title = bookID.getEnglishName();
        Node titleElem = osisBook.getFirstChild();
        while (titleElem instanceof Text) titleElem = titleElem.getNextSibling();
        if (titleElem instanceof Element && titleElem.getNodeName().equals("title")) {
            Element titleElement = (Element) titleElem;
            if (titleElement.getAttribute("type").equals("main") && titleElement.getChildNodes().getLength() > 0)
                title = titleElement.getTextContent();
        }
        Book bibleBook = new Book(bookOsisID, bookID, title, title);
        result.getBooks().add(bibleBook);
        parseBook(bookOsisID, osisBook, bibleBook);
    }
    return result;
}
Also used : XPath(javax.xml.xpath.XPath) MetadataBook(biblemulticonverter.data.MetadataBook) Bible(biblemulticonverter.data.Bible) NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) Document(org.w3c.dom.Document) BookID(biblemulticonverter.data.BookID) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book)

Aggregations

Book (biblemulticonverter.data.Book)22 MetadataBook (biblemulticonverter.data.MetadataBook)22 Chapter (biblemulticonverter.data.Chapter)19 Verse (biblemulticonverter.data.Verse)17 BookID (biblemulticonverter.data.BookID)13 VirtualVerse (biblemulticonverter.data.VirtualVerse)12 Bible (biblemulticonverter.data.Bible)10 FormattedText (biblemulticonverter.data.FormattedText)10 Headline (biblemulticonverter.data.FormattedText.Headline)10 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)9 FileInputStream (java.io.FileInputStream)6 Visitor (biblemulticonverter.data.FormattedText.Visitor)5 BIBLEBOOK (biblemulticonverter.schema.zef2005.BIBLEBOOK)5 CAPTION (biblemulticonverter.schema.zef2005.CAPTION)5 CHAPTER (biblemulticonverter.schema.zef2005.CHAPTER)5 VERS (biblemulticonverter.schema.zef2005.VERS)5 File (java.io.File)5 FileOutputStream (java.io.FileOutputStream)5 EnumMap (java.util.EnumMap)5