Search in sources :

Example 6 with CHAPTER

use of biblemulticonverter.schema.zef2005.CHAPTER in project BibleMultiConverter by schierlm.

the class ZefaniaXML method parseBible.

protected Bible parseBible(XMLBIBLE doc) throws Exception {
    Bible result = new Bible(doc.getBiblename());
    MetadataBook metadata = new MetadataBook();
    if (doc.getStatus() != null) {
        metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
    }
    if (doc.getVersion() != null) {
        metadata.setValue(MetadataBookKey.version, doc.getVersion());
    }
    if (doc.getRevision() != null) {
        metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
    }
    for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
        if (elem.getValue() == null)
            continue;
        String value = normalize(elem.getValue().toString(), true).trim();
        if (value.length() != 0)
            metadata.setValue(elem.getName().getLocalPart(), value);
    }
    metadata.finished();
    if (metadata.getKeys().size() > 0)
        result.getBooks().add(metadata.getBook());
    Set<String> abbrs = new HashSet<String>();
    Set<String> shortnames = new HashSet<String>();
    Set<String> longnames = new HashSet<String>();
    Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
    List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        int number = e.getBnumber().intValue();
        BookID bookID;
        try {
            bookID = BookID.fromZefId(number);
        } catch (IllegalArgumentException ex) {
            continue;
        }
        if (shortname == null || shortname.length() == 0)
            shortname = "_" + bookID.getOsisID();
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        abbrMap.put(bookID, abbr);
    }
    abbrs.clear();
    EnumMap<BookID, Book> existingBooks = new EnumMap<BookID, Book>(BookID.class);
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        String longname = e.getBname();
        int number = e.getBnumber().intValue();
        BookID bookID;
        try {
            bookID = BookID.fromZefId(number);
        } catch (IllegalArgumentException ex) {
            System.out.println("WARNING: Skipping book with unknown id " + number);
            continue;
        }
        if (shortname == null || shortname.length() == 0)
            shortname = "_" + bookID.getOsisID();
        if (longname == null || longname.length() == 0)
            longname = "_" + bookID.getEnglishName();
        else
            longname = longname.replaceAll("  ++", " ").trim();
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
            System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
            shortname = "Exo";
            longname = "Exodus";
        }
        if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
            System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
            shortname = "2Chr";
        }
        if (shortnames.contains(shortname)) {
            System.out.println("WARNING: Duplicate short name " + shortname);
            for (int i = 2; i < 100; i++) {
                if (!shortnames.contains(shortname + i)) {
                    shortname = shortname + i;
                    break;
                }
            }
        }
        shortnames.add(shortname);
        if (longnames.contains(longname)) {
            System.out.println("WARNING: Duplicate long name " + shortname);
            for (int i = 2; i < 100; i++) {
                if (!longnames.contains(longname + i)) {
                    longname = longname + i;
                    break;
                }
            }
        }
        longnames.add(longname);
        Book book = existingBooks.get(bookID);
        if (book == null) {
            book = new Book(abbr, bookID, shortname, longname);
            existingBooks.put(bookID, book);
            result.getBooks().add(book);
        }
        List<Headline> headlineBuffer = new ArrayList<Headline>();
        for (CHAPTER e2 : e.getCHAPTER()) {
            int chapterNumber = e2.getCnumber().intValue();
            while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
            Chapter chapter = book.getChapters().get(chapterNumber - 1);
            int existingVerses = chapter.getVerses().size();
            for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
                if (e3 instanceof CAPTION) {
                    CAPTION caption = (CAPTION) e3;
                    int depth;
                    if (caption.getType() == null) {
                        depth = 9;
                    } else {
                        switch(caption.getType()) {
                            case X_H_1:
                                depth = 1;
                                break;
                            case X_H_2:
                                depth = 2;
                                break;
                            case X_H_3:
                                depth = 3;
                                break;
                            case X_H_4:
                                depth = 4;
                                break;
                            case X_H_5:
                                depth = 5;
                                break;
                            case X_H_6:
                                depth = 6;
                                break;
                            default:
                                depth = 9;
                                break;
                        }
                    }
                    int lastDepth = headlineBuffer.size() == 0 ? -1 : headlineBuffer.get(headlineBuffer.size() - 1).getDepth();
                    if (depth <= lastDepth)
                        depth = lastDepth == 9 ? 9 : lastDepth + 1;
                    Headline h = new Headline(depth);
                    if (parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
                        h.trimWhitespace();
                        h.finished();
                        headlineBuffer.add(h);
                    }
                } else if (e3 instanceof REMARK) {
                    REMARK remark = (REMARK) e3;
                    int vref = remark.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        continue;
                    Verse v = chapter.getVerses().get(idx);
                    if (remark.getContent().size() != 1)
                        continue;
                    String remarkText = normalize((String) remark.getContent().get(0), true).trim();
                    v.getAppendVisitor().visitFootnote().visitText(remarkText);
                } else if (e3 instanceof XREF) {
                    XREF xref = (XREF) e3;
                    int vref = xref.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        continue;
                    Verse v = chapter.getVerses().get(idx);
                    Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitFootnote();
                    boolean first = true;
                    for (String mscope : xref.getMscope().split(" ")) {
                        Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                        if (!m.matches())
                            continue;
                        BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                        int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
                        if (m.group(3) != null)
                            endChapter = Integer.parseInt(m.group(3).substring(1));
                        String verse = m.group(4);
                        String endVerse = m.group(5);
                        if (endVerse == null)
                            endVerse = verse;
                        else
                            endVerse = endVerse.substring(1);
                        if (verse.equals("0") || endVerse.equals("0"))
                            continue;
                        if (xrefChapter == endChapter && Integer.parseInt(verse) > Integer.parseInt(endVerse))
                            continue;
                        String xrefAbbr = abbrMap.get(xrefBookID);
                        if (xrefAbbr == null)
                            xrefAbbr = xrefBookID.getOsisID();
                        if (first)
                            first = false;
                        else
                            footnoteVisitor.visitText(" ");
                        footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
                    }
                    if (first)
                        visitEmptyMarker(footnoteVisitor);
                } else if (e3 instanceof PROLOG) {
                    PROLOG prolog = (PROLOG) e3;
                    if (chapter.getProlog() != null)
                        continue;
                    FormattedText prologText = new FormattedText();
                    if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
                        prologText.trimWhitespace();
                        prologText.finished();
                        chapter.setProlog(prologText);
                    }
                } else if (e3 instanceof VERS) {
                    VERS vers = (VERS) e3;
                    int vnumber = vers.getVnumber() == null ? 0 : vers.getVnumber().intValue();
                    if (vnumber == 0)
                        vnumber = chapter.getVerses().size() + 1;
                    String verseNumber = vnumber + (vers.getAix() == null ? "" : vers.getAix());
                    String infix = "";
                    while (chapter.getVerseIndex(verseNumber) != -1) {
                        for (char ch = 'a'; ch <= 'z'; ch++) {
                            if (chapter.getVerseIndex(vnumber + infix + ch) == -1) {
                                verseNumber = vnumber + infix + ch;
                                break;
                            }
                        }
                        infix += "-";
                    }
                    Verse verse = new Verse(verseNumber);
                    Visitor<RuntimeException> visitor = verse.getAppendVisitor();
                    boolean contentFound = false;
                    if (headlineBuffer.size() > 0) {
                        for (Headline h : headlineBuffer) {
                            h.accept(visitor.visitHeadline(h.getDepth()));
                        }
                        headlineBuffer.clear();
                        contentFound = true;
                    }
                    contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
                    if (contentFound) {
                        verse.trimWhitespace();
                        chapter.getVerses().add(verse);
                    }
                }
            }
            for (Verse v : chapter.getVerses()) {
                if (existingVerses > 0) {
                    existingVerses--;
                    continue;
                }
                v.finished();
            }
        }
    }
    return result;
}
Also used : Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) BookID(biblemulticonverter.data.BookID) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) PROLOG(biblemulticonverter.schema.zef2005.PROLOG) EnumMap(java.util.EnumMap) REMARK(biblemulticonverter.schema.zef2005.REMARK) HashSet(java.util.HashSet) MetadataBook(biblemulticonverter.data.MetadataBook) Chapter(biblemulticonverter.data.Chapter) FormattedText(biblemulticonverter.data.FormattedText) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) XREF(biblemulticonverter.schema.zef2005.XREF) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 7 with CHAPTER

use of biblemulticonverter.schema.zef2005.CHAPTER in project BibleMultiConverter by schierlm.

the class ZefaniaXMLRoundtrip method parseContent.

private boolean parseContent(Visitor<RuntimeException> visitor, List<Object> contentList, Map<BookID, String> abbrMap) throws IOException {
    boolean contentFound = false;
    for (Object n : contentList) {
        if (n instanceof String) {
            String value = normalize((String) n, false);
            visitor.visitText(value);
            contentFound |= value.trim().length() > 0;
        } else if (n instanceof DIV || n instanceof NOTE) {
            NOTE note;
            Visitor<RuntimeException> v;
            if (n instanceof DIV) {
                note = ((DIV) n).getNOTE();
                if (note.getContent().size() == 0)
                    continue;
                v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "div").visitFootnote();
            } else {
                note = (NOTE) n;
                if (note.getContent().size() == 0)
                    continue;
                v = visitor.visitFootnote();
            }
            boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
            if (!subContentFound)
                visitEmptyMarker(v);
            contentFound = true;
        } else if (n instanceof BR) {
            BR br = (BR) n;
            Visitor<RuntimeException> v = visitor;
            int count = 1;
            if (br.getCount() != null) {
                count = br.getCount().intValue();
                v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "newline-group", br.getCount() + "--" + br.getArt().value());
            }
            if (count < 1 || count > 10)
                throw new RuntimeException();
            for (int ii = 0; ii < count; ii++) {
                switch(br.getArt()) {
                    case X_NL:
                        v.visitLineBreak(LineBreakKind.NEWLINE);
                        break;
                    case X_P:
                        v.visitLineBreak(LineBreakKind.PARAGRAPH);
                        break;
                    default:
                        throw new RuntimeException(br.getArt().toString());
                }
            }
            contentFound = true;
        } else if (n instanceof XREF) {
            XREF xref = (XREF) n;
            Visitor<RuntimeException> footnoteVisitor = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "inner-xref").visitFootnote();
            boolean first = true;
            for (String mscope : xref.getMscope().split(" ")) {
                Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                if (!m.matches())
                    throw new IOException(mscope);
                BookID bookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                int chapter = Integer.parseInt(m.group(2)), endChapter = chapter;
                if (m.group(3) != null)
                    endChapter = Integer.parseInt(m.group(3).substring(1));
                String verse = m.group(4);
                if (verse.equals("0"))
                    verse = "1//G";
                String endVerse = m.group(5);
                if (endVerse == null)
                    endVerse = verse;
                else
                    endVerse = endVerse.substring(1);
                if (endVerse.equals("0"))
                    endVerse = "1//G";
                String abbr = abbrMap.get(bookID);
                if (abbr == null)
                    abbr = bookID.getOsisID();
                if (first)
                    first = false;
                else
                    footnoteVisitor.visitText(" ");
                if (chapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
                    String tmp = verse;
                    verse = endVerse;
                    endVerse = tmp;
                }
                footnoteVisitor.visitCrossReference(abbr, bookID, chapter, verse, endChapter, endVerse).visitText(abbr + " " + chapter + ":" + verse);
            }
            contentFound = true;
        } else if (n instanceof JAXBElement<?>) {
            String name = ((JAXBElement<?>) n).getName().toString();
            Object nn = ((JAXBElement<?>) n).getValue();
            if (name.equals("STYLE") && nn instanceof STYLE) {
                String css = ((STYLE) nn).getCss();
                String id = ((STYLE) nn).getId();
                if (id != null && css != null)
                    throw new IOException(id + "/" + css);
                if (css != null && css.startsWith("display:block;")) {
                    // not really a formatting instruction, but more some
                    // clever way of indentation
                    List<Object> content = ((STYLE) nn).getContent();
                    Visitor<RuntimeException> contentVisitor = visitor.visitCSSFormatting(css);
                    boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
                    if (!subContentFound)
                        visitEmptyMarker(contentVisitor);
                } else {
                    FormattingInstructionKind kind;
                    if (id != null && id.equals("cl:divineName")) {
                        kind = FormattingInstructionKind.DIVINE_NAME;
                    } else if (css == null) {
                        throw new IOException(id);
                    } else if (css.contains("italic")) {
                        kind = FormattingInstructionKind.ITALIC;
                    } else if (css.contains("bold")) {
                        kind = FormattingInstructionKind.BOLD;
                    } else if (css.equalsIgnoreCase("color:#FF0000")) {
                        kind = FormattingInstructionKind.WORDS_OF_JESUS;
                    } else if (css.equals("color:blue")) {
                        kind = FormattingInstructionKind.LINK;
                    } else if (css.equals("color:#00CC33;font-size:8pt;vertical-align:super") || css.equals("font-size:small")) {
                        kind = FormattingInstructionKind.SUPERSCRIPT;
                    } else {
                        throw new IOException(css);
                    }
                    List<Object> content = ((STYLE) nn).getContent();
                    Visitor<RuntimeException> contentVisitor = visitor.visitFormattingInstruction(kind);
                    if (css != null && !kind.getCss().equals(css)) {
                        contentVisitor = contentVisitor.visitCSSFormatting(css);
                    }
                    if (content.size() == 0) {
                        visitEmptyMarker(contentVisitor);
                    } else {
                        boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
                        if (!subContentFound)
                            visitEmptyMarker(contentVisitor);
                    }
                }
            } else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
                GRAM gram = (GRAM) nn;
                Visitor<RuntimeException> strongVisitor = visitor;
                if (!name.equals("GRAM")) {
                    strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "gram-tag", name);
                }
                if (gram.getStr() == null && gram.getRmac() == null)
                    throw new IOException();
                int[] strongs = null;
                if (gram.getStr() != null) {
                    String strong = gram.getStr().trim().replaceAll(" ++", " ");
                    if (strong.length() == 0)
                        strong = "0";
                    if (strong.equals("?"))
                        strong = "99111";
                    if (strong.startsWith("G")) {
                        strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "strong-prefix", "G");
                        strong = strong.replace("G", "");
                    } else if (strong.startsWith("H")) {
                        strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "strong-prefix", "H");
                        strong = strong.replace("H", "");
                    }
                    if (!strong.matches("[0-9]+( [0-9]+)*"))
                        throw new IOException(strong);
                    String[] tmpStrongs = strong.split(" ");
                    strongs = new int[tmpStrongs.length];
                    for (int i = 0; i < tmpStrongs.length; i++) {
                        strongs[i] = Integer.parseInt(tmpStrongs[i]);
                    }
                }
                String[] rmacs = null;
                if (gram.getRmac() != null) {
                    String rmac = gram.getRmac();
                    rmacs = rmac.split(" ");
                }
                strongVisitor = strongVisitor.visitGrammarInformation(strongs, rmacs, null);
                if (!parseContent(strongVisitor, gram.getContent(), abbrMap)) {
                    visitEmptyMarker(strongVisitor);
                }
            } else {
                throw new IOException(name + "/" + nn.getClass().toString());
            }
            contentFound = true;
        } else {
            throw new IOException(n.getClass().toString());
        }
    }
    return contentFound;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) STYLE(biblemulticonverter.schema.zef2005.STYLE) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) IOException(java.io.IOException) JAXBElement(javax.xml.bind.JAXBElement) DIV(biblemulticonverter.schema.zef2005.DIV) BR(biblemulticonverter.schema.zef2005.BR) XREF(biblemulticonverter.schema.zef2005.XREF) BookID(biblemulticonverter.data.BookID) NOTE(biblemulticonverter.schema.zef2005.NOTE) GRAM(biblemulticonverter.schema.zef2005.GRAM)

Aggregations

BookID (biblemulticonverter.data.BookID)6 Book (biblemulticonverter.data.Book)5 Chapter (biblemulticonverter.data.Chapter)5 Headline (biblemulticonverter.data.FormattedText.Headline)5 MetadataBook (biblemulticonverter.data.MetadataBook)5 Verse (biblemulticonverter.data.Verse)5 VirtualVerse (biblemulticonverter.data.VirtualVerse)5 BIBLEBOOK (biblemulticonverter.schema.zef2005.BIBLEBOOK)5 CAPTION (biblemulticonverter.schema.zef2005.CAPTION)5 CHAPTER (biblemulticonverter.schema.zef2005.CHAPTER)5 STYLE (biblemulticonverter.schema.zef2005.STYLE)5 VERS (biblemulticonverter.schema.zef2005.VERS)5 XREF (biblemulticonverter.schema.zef2005.XREF)5 ArrayList (java.util.ArrayList)5 PROLOG (biblemulticonverter.schema.zef2005.PROLOG)4 Matcher (java.util.regex.Matcher)4 JAXBElement (javax.xml.bind.JAXBElement)4 FormattedText (biblemulticonverter.data.FormattedText)3 FormattingInstructionKind (biblemulticonverter.data.FormattedText.FormattingInstructionKind)3 Visitor (biblemulticonverter.data.FormattedText.Visitor)3