Search in sources :

Example 1 with Bible

use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.

the class StrongConcordance method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    if (exportArgs.length != 2)
        throw new IOException("Two parameters needed!");
    Map<String, List<OccurrenceInfo>> occurrences = new HashMap<String, List<OccurrenceInfo>>();
    int bookIndex = 0;
    for (Book bk : bible.getBooks()) {
        int cnumber = 0;
        for (Chapter ch : bk.getChapters()) {
            cnumber++;
            for (Verse v : ch.getVerses()) {
                Map<String, List<StringBuilder>> strongInfo = new HashMap<>();
                v.accept(new StrongInfoVisitor(strongInfo, bk.getId().isNT() ? 'G' : 'H'));
                for (Map.Entry<String, List<StringBuilder>> e : strongInfo.entrySet()) {
                    String[] strongs = e.getKey().split("\\+");
                    String suffix = "";
                    if (strongs.length > 1)
                        suffix = " [" + e.getKey() + "]";
                    for (StringBuilder val : e.getValue()) {
                        OccurrenceInfo info = new OccurrenceInfo(val.toString().trim() + suffix, bookIndex, cnumber, v.getNumber());
                        for (String strong : strongs) {
                            List<OccurrenceInfo> occInfo = occurrences.get(strong);
                            if (occInfo == null) {
                                occInfo = new ArrayList<OccurrenceInfo>();
                                occurrences.put(strong, occInfo);
                            }
                            occInfo.add(info);
                        }
                    }
                }
            }
        }
        // save memory for the dictionary
        bk.getChapters().clear();
        bookIndex++;
    }
    Diffable diffable = new Diffable();
    Bible dict = diffable.doImport(new File(exportArgs[0]));
    for (Book bk : dict.getBooks()) {
        if (bk.getId() != BookID.DICTIONARY_ENTRY || occurrences.get(bk.getAbbr()) == null)
            continue;
        List<OccurrenceInfo> occ = occurrences.remove(bk.getAbbr());
        FormattedText old = bk.getChapters().get(0).getProlog();
        FormattedText changed = new FormattedText();
        Visitor<RuntimeException> v = changed.getAppendVisitor();
        old.accept(v);
        v.visitHeadline(1).visitText("Occurrences in " + bible.getName());
        Collections.sort(occ);
        List<OccurrenceInfo> part = new ArrayList<>();
        while (occ.size() > 0) {
            OccurrenceInfo first = occ.remove(0);
            part.add(first);
            while (occ.size() > 0 && occ.get(0).phrase.equals(first.phrase)) {
                part.add(occ.remove(0));
            }
            v.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText(first.phrase + " (" + part.size() + "):");
            for (int i = 0; i < part.size(); i++) {
                v.visitText(i == 0 ? " " : ", ");
                int cnt = 1;
                OccurrenceInfo curr = part.get(i);
                while (i + 1 < part.size() && part.get(i + 1).equals(curr)) {
                    cnt++;
                    i++;
                }
                Book book = bible.getBooks().get(curr.bookIndex);
                v.visitCrossReference(book.getAbbr(), book.getId(), curr.chapter, curr.verse, curr.chapter, curr.verse).visitText(book.getAbbr() + " " + curr.chapter + ":" + curr.verse);
                if (cnt > 1)
                    v.visitText(" (" + cnt + ")");
            }
            v.visitLineBreak(LineBreakKind.PARAGRAPH);
            part.clear();
        }
        changed.finished();
        bk.getChapters().get(0).setProlog(changed);
    }
    if (!occurrences.isEmpty())
        System.out.println("Missing Strong references in dictionary: " + occurrences.keySet());
    diffable.doExport(dict, new String[] { exportArgs[1] });
}
Also used : HashMap(java.util.HashMap) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) Book(biblemulticonverter.data.Book) ArrayList(java.util.ArrayList) List(java.util.List) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) Verse(biblemulticonverter.data.Verse)

Example 2 with Bible

use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.

the class StrongDictionary method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    Bible result = new Bible("Strong's dictionary");
    MetadataBook mb = new MetadataBook();
    mb.setValue(MetadataBookKey.description, "Strong's dictionary compiled by BibleMultiConverter from public sources.");
    mb.setValue(MetadataBookKey.source, "https://github.com/openscriptures/HebrewLexicon/ and https://github.com/morphgnt/strongs-dictionary-xml/");
    mb.setValue(MetadataBookKey.rights, "Strong's Greek Dictionary is in the public domain. Strong's Hebrew Dictionary is provided as XML files by the Open Scriptures Hebrew Bible Project, which are licensed CC-BY-4.0.");
    mb.finished();
    result.getBooks().add(mb.getBook());
    DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    Document doc;
    try (InputStream in = new URL("https://raw.githubusercontent.com/morphgnt/strongs-dictionary-xml/master/strongsgreek.xml").openStream()) {
        doc = db.parse(in);
    }
    for (Node entryNode = doc.getDocumentElement().getLastChild().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
        Element entry = (Element) entryNode;
        int number = Integer.parseInt(entry.getAttribute("strongs"));
        System.out.println("G" + number);
        Book bk = new Book("G" + number, BookID.DICTIONARY_ENTRY, "G" + number, "G" + number);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        result.getBooks().add(bk);
        Visitor<RuntimeException> v = prolog.getAppendVisitor();
        for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode instanceof Text) {
                if (childNode.getTextContent().replaceAll("[ \r\n\t]+", " ").equals(" or ") && childNode.getNextSibling().getNodeName().equals("greek")) {
                    v.visitFormattingInstruction(FormattingInstructionKind.ITALIC).visitText("-or-");
                    v.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (childNode.getTextContent().trim().length() > 0) {
                    visitAttribute(v, "Remark", childNode.getTextContent());
                }
                continue;
            }
            Element elem = (Element) childNode;
            switch(elem.getNodeName()) {
                case "strongs":
                    int compNumber = Integer.parseInt(elem.getTextContent());
                    if (compNumber != number)
                        throw new IOException(compNumber + " != " + number);
                    break;
                case "greek":
                    v.visitHeadline(1).visitText(elem.getAttribute("unicode"));
                    visitAttribute(v, "Transliteration", elem.getAttribute("translit"));
                    break;
                case "pronunciation":
                    visitAttribute(v, "Pronunciation", elem.getAttribute("strongs"));
                    break;
                case "strongs_derivation":
                    visitAttribute(v, "Strongs Derivation", parseGreekContent(elem));
                    break;
                case "strongs_def":
                    visitAttribute(v, "Strongs Definition", parseGreekContent(elem));
                    break;
                case "kjv_def":
                    visitAttribute(v, "KJV Definition", parseGreekContent(elem));
                    if (elem.getNextSibling() != null && !elem.getNextSibling().getNodeName().equals("see")) {
                        Element moreInfo = doc.createElement("more_info");
                        elem.getParentNode().insertBefore(moreInfo, elem.getNextSibling());
                        while (moreInfo.getNextSibling() != null) {
                            if (moreInfo.getNextSibling().getNodeName().equals("see"))
                                break;
                            moreInfo.appendChild(moreInfo.getNextSibling());
                        }
                        if (moreInfo.getTextContent().trim().isEmpty())
                            moreInfo.getParentNode().removeChild(moreInfo);
                    }
                    break;
                case "strongsref":
                    visitAttribute(v, "Reference", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
                case "more_info":
                    visitAttribute(v, "More Information", parseGreekContent(elem));
                    break;
                case "see":
                    visitAttribute(v, "See Also", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
                    break;
                default:
                    throw new IOException(elem.getNodeName());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    try (InputStream in = new URL("https://raw.githubusercontent.com/openscriptures/HebrewLexicon/master/HebrewStrong.xml").openStream()) {
        doc = db.parse(in);
    }
    for (Node entryNode = doc.getDocumentElement().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
        if (entryNode instanceof Text) {
            if (!entryNode.getTextContent().trim().isEmpty()) {
                throw new IOException(entryNode.getTextContent());
            }
            continue;
        }
        Element entry = (Element) entryNode;
        String id = entry.getAttribute("id");
        System.out.println(id);
        Book bk = new Book(id, BookID.DICTIONARY_ENTRY, id, id);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        result.getBooks().add(bk);
        Visitor<RuntimeException> v = prolog.getAppendVisitor();
        for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode instanceof Text) {
                if (!childNode.getTextContent().trim().isEmpty()) {
                    throw new IOException(childNode.getTextContent());
                }
                continue;
            }
            Element elem = (Element) childNode;
            switch(elem.getNodeName()) {
                case "w":
                    v.visitHeadline(1).visitText(elem.getTextContent());
                    visitAttribute(v, "Transliteration", elem.getAttribute("xlit"));
                    visitAttribute(v, "Pronunciation", elem.getAttribute("pron"));
                    if (elem.getAttribute("xml:lang").equals("heb")) {
                        visitAttribute(v, "Language", "Hebrew");
                    } else if (elem.getAttribute("xml:lang").equals("arc")) {
                        visitAttribute(v, "Language", "Aramaic");
                    } else if (elem.getAttribute("xml:lang").equals("x-pn")) {
                        visitAttribute(v, "Language", "Proper Noun");
                    } else {
                        throw new IOException(elem.getAttribute("xml:lang"));
                    }
                    visitAttribute(v, "Part of speech", elem.getAttribute("pos"));
                    break;
                case "source":
                    visitAttribute(v, "Source", parseHebrewContent(elem));
                    break;
                case "meaning":
                    visitAttribute(v, "Meaning", parseHebrewContent(elem));
                    break;
                case "usage":
                    visitAttribute(v, "Usage", parseHebrewContent(elem));
                    break;
                case "note":
                    // skip
                    break;
                default:
                    throw new IOException(elem.getNodeName());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    return result;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) InputStream(java.io.InputStream) Bible(biblemulticonverter.data.Bible) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) Chapter(biblemulticonverter.data.Chapter) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) FormattedText(biblemulticonverter.data.FormattedText) IOException(java.io.IOException) Document(org.w3c.dom.Document) URL(java.net.URL) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book)

Example 3 with Bible

use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.

the class UnboundBible method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    boolean useRoundtrip = Boolean.getBoolean("unbound.roundtrip");
    boolean useParsedFormat = Boolean.getBoolean("unbound.parsed");
    try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile), StandardCharsets.UTF_8))) {
        String line = br.readLine();
        if (line.isEmpty())
            // mapped ones have an extra empty line...
            line = br.readLine();
        if (!line.equals("#THE UNBOUND BIBLE (www.unboundbible.org)"))
            throw new IOException(line);
        line = br.readLine();
        if (!line.startsWith("#name\t"))
            throw new IOException(line);
        Bible result = new Bible(line.substring(6));
        MetadataBook mb = new MetadataBook();
        result.getBooks().add(mb.getBook());
        line = br.readLine();
        if (!line.startsWith("#filetype\t"))
            throw new IOException(line);
        UnboundBibleFileType filetype = UnboundBibleFileType.valueOf(line.substring(10).replace('-', '_'));
        if (filetype == UnboundBibleFileType.Unmapped_BCV && useRoundtrip) {
            mb.setValue("filetype@unbound", filetype.toString());
        }
        readMetadata(br, mb, "copyright", MetadataBookKey.rights.toString());
        readMetadata(br, mb, "abbreviation", "abbreviation@unbound");
        readMetadata(br, mb, "language", MetadataBookKey.language.toString());
        readMetadata(br, mb, "note", MetadataBookKey.description.toString());
        mb.finished();
        line = br.readLine();
        if (!line.equals("#columns\t" + filetype.getColumnHeader()))
            throw new IOException(line);
        Map<BookID, Book> books = new HashMap<>();
        int sorting = -1, lastChapter = 0;
        String[] lastFields = new String[0];
        while ((line = br.readLine()) != null) {
            if (line.startsWith("#"))
                throw new IOException(line);
            if (line.trim().isEmpty())
                continue;
            String[] fields = filetype.parseFields(line, "orig_book_index", "orig_chapter", "orig_verse", "orig_subverse", "order_by", "text");
            if (fields[4] != null && Arrays.equals(fields, lastFields))
                continue;
            if (fields[2].isEmpty() && fields[4].equals("0") && fields[5].isEmpty())
                continue;
            UnboundBibleBookInfo bi = BOOK_INFO_BY_CODE.get(fields[0]);
            if (bi == null)
                throw new IOException("Invalid book code: " + fields[0] + " in " + line);
            Book bk = books.get(bi.id);
            if (bk == null) {
                bk = new Book(bi.id.getOsisID(), bi.id, bi.name, bi.name);
                result.getBooks().add(bk);
                books.put(bi.id, bk);
                lastChapter = 0;
            }
            int chapter = Integer.parseInt(fields[1]);
            String verse = "" + Integer.parseInt(fields[2]);
            if (chapter == 0) {
                chapter = 1;
                verse += "//";
            } else if (verse.equals("0")) {
                verse = "1-/";
            }
            String subverse = fields[3];
            if (subverse != null && !subverse.isEmpty()) {
                if (subverse.length() == 1 && subverse.charAt(0) >= 'a' && subverse.charAt(0) <= 'z') {
                    verse += subverse;
                } else if (subverse.length() == 2 && subverse.charAt(0) == subverse.charAt(1) && subverse.charAt(0) >= 'a' && subverse.charAt(0) <= 'z') {
                    verse += "." + subverse.charAt(0);
                } else if (subverse.matches("[.-][0-9]+")) {
                    verse += subverse;
                } else if (subverse.equals("EndA")) {
                    verse += "/a";
                } else if (subverse.equals("EndB")) {
                    verse += "/b";
                } else {
                    throw new IOException(subverse);
                }
            }
            if (chapter < lastChapter) {
                System.out.println("WARNING: Verses reordered across chapters detected");
                verse = chapter + "," + verse;
                chapter = lastChapter;
            }
            lastChapter = chapter;
            int sortingDiff = 0;
            if (fields[4] == null) {
                if (sorting != -1)
                    throw new IOException("Inconsistent sorting: " + line);
            } else {
                int s = Integer.parseInt(fields[4]);
                if (s <= sorting && lastFields[2].equals(fields[2]))
                    throw new IOException("Inconsistent sorting: " + s + " <= last " + sorting + " in " + line);
                if (s != (sorting == -1 ? 10 : sorting + 10)) {
                    sortingDiff = s - (sorting == -1 ? 10 : sorting + 10);
                }
                sorting = s;
                if (lastFields.length > 5 && lastFields[5].equals(fields[5]) && lastFields[2].equals(fields[2]))
                    System.out.println("WARNING: Same verse text as previous: " + line);
            }
            lastFields = fields;
            String text = fields[5];
            if (useRoundtrip) {
                String last;
                do {
                    last = text;
                    text = text.replace("  ", " \uFEFF ");
                } while (!last.equals(text));
                if (text.endsWith(" "))
                    text += "\uFEFF";
                if (text.startsWith(" "))
                    text = "\uFEFF" + text;
                if (text.length() == 0)
                    text = "\uFEFF-\uFEFF";
            } else {
                text = text.replaceAll("  +", " ").trim();
                if (text.length() == 0) {
                    if (bk.getChapters().size() == 0) {
                        books.remove(bk.getId());
                        result.getBooks().remove(bk);
                    }
                    continue;
                }
            }
            while (bk.getChapters().size() < chapter) bk.getChapters().add(new Chapter());
            if (bk.getChapters().size() != chapter && useRoundtrip)
                throw new RuntimeException("Invalid chapter order: " + bk.getId() + chapter + "/" + verse + " " + text);
            Chapter ch = bk.getChapters().get(chapter - 1);
            if (!ch.getVerses().isEmpty() && ch.getVerses().get(ch.getVerses().size() - 1).getNumber().equals(verse))
                verse += "/";
            Verse vv = new Verse(verse);
            Visitor<RuntimeException> vvv = vv.getAppendVisitor();
            if (useParsedFormat) {
                String[] words = text.split(" ");
                int[] strongs = new int[10];
                String[] rmacs = new String[10];
                int strongCount = 0, rmacCount = 0;
                String word = words[0];
                for (int i = 1; i < words.length; i++) {
                    if (words[i].matches("[GH][0-9]+")) {
                        strongs[strongCount++] = Integer.parseInt(words[i].substring(1));
                    } else if (words[i].matches(Utils.RMAC_REGEX)) {
                        rmacs[rmacCount++] = words[i];
                    } else {
                        if (strongCount > 0 || rmacCount > 0) {
                            vvv.visitGrammarInformation(strongCount > 0 ? Arrays.copyOf(strongs, strongCount) : null, rmacCount > 0 ? Arrays.copyOf(rmacs, rmacCount) : null, null).visitText(word);
                            strongCount = rmacCount = 0;
                        } else {
                            vvv.visitText(word);
                        }
                        vvv.visitText(" ");
                        word = words[i];
                    }
                }
                if (strongCount > 0 || rmacCount > 0) {
                    vvv.visitGrammarInformation(strongCount > 0 ? Arrays.copyOf(strongs, strongCount) : null, rmacCount > 0 ? Arrays.copyOf(rmacs, rmacCount) : null, null).visitText(word);
                    strongCount = rmacCount = 0;
                } else {
                    vvv.visitText(word);
                }
            } else {
                vvv.visitText(text);
            }
            if (useRoundtrip && sortingDiff != 0) {
                vvv.visitExtraAttribute(ExtraAttributePriority.SKIP, "unbound", "sorting-diff", "" + sortingDiff);
            }
            vv.finished();
            ch.getVerses().add(vv);
        }
        return result;
    }
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) BufferedReader(java.io.BufferedReader) Verse(biblemulticonverter.data.Verse)

Example 4 with Bible

use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.

the class YCHPalmBible method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    String content = new String(Files.readAllBytes(inputFile.toPath()), StandardCharsets.ISO_8859_1);
    content = content.replaceAll("[\r\n\t ]+", " ").replace(" <", "<").replaceAll("> ", ">");
    if (!content.startsWith("<PARSERINFO "))
        throw new IOException("Invalid file, does not start with <PARSERINFO>");
    int pos = content.indexOf('>');
    Map<String, String> params = parseParams(content.substring(12, pos));
    String charset = params.get("DECODE");
    if (charset == null)
        charset = params.get("ENCODE");
    else
        charset = "ISO-8859-1";
    content = new String(content.substring(pos + 1).getBytes(StandardCharsets.ISO_8859_1), charset);
    if (!content.startsWith("<BIBLE ")) {
        throw new IOException("Missing tag <BIBLE>");
    }
    pos = content.indexOf('>');
    params = parseParams(content.substring(7, pos));
    String name = params.get("NAME");
    String info = params.get("INFO");
    if (name == null || name.isEmpty())
        name = "Untitled YCHPalmBible bible";
    Bible bbl = new Bible(name);
    if (info != null && !info.equals(name)) {
        MetadataBook mb = new MetadataBook();
        mb.setValue(MetadataBookKey.description, info);
        bbl.getBooks().add(mb.getBook());
    }
    int offs = pos + 1;
    while (content.startsWith("<BOOK ", offs)) {
        pos = content.indexOf('>', offs);
        params = parseParams(content.substring(offs + 6, pos));
        offs = pos + 1;
        String bname = params.get("NAME");
        int bnumber = Integer.parseInt(params.get("NUMBER"));
        String babbr = params.get("SHORTCUT");
        BookID bid = null;
        for (int i = 0; i < PALM_BOOK_NUMBERS.length; i++) {
            if (PALM_BOOK_NUMBERS[i] == bnumber)
                bid = BookID.fromZefId(i);
        }
        if (bid == null)
            throw new IOException("Unsupported BOOK NUMBER: " + bnumber);
        Book bk = new Book(babbr, bid, bname, bname);
        while (content.startsWith("<CHAPTER>", offs)) {
            offs += 9;
            Chapter ch = new Chapter();
            int vnum = 1;
            while (content.startsWith("<VERSE>", offs)) {
                pos = content.indexOf("</VERSE>", offs);
                String[] verseContent = parseVerseContent(content.substring(offs + 7, pos));
                offs = pos + 8;
                Verse vv = new Verse("" + vnum);
                vnum++;
                if (verseContent.length == 1) {
                    if (verseContent[0].isEmpty())
                        continue;
                    verseContent = new String[] { "", "<VERSTEXT>", verseContent[0] };
                }
                if (!verseContent[0].isEmpty())
                    throw new IOException("Untagged text inside verse: " + verseContent[0]);
                for (int i = 1; i < verseContent.length; i += 2) {
                    switch(verseContent[i]) {
                        case "<BOOKTEXT>":
                            if (bk.getChapters().size() > 0) {
                                throw new IOException("<BOOKTEXT> not in first chapter");
                            }
                            bk = new Book(babbr, bid, bname, verseContent[i + 1]);
                            break;
                        case "<CHAPTEXT>":
                            vv.getAppendVisitor().visitHeadline(1).visitText(verseContent[i + 1]);
                            break;
                        case "<DESCTEXT>":
                            vv.getAppendVisitor().visitHeadline(9).visitText(verseContent[i + 1]);
                            break;
                        case "<VERSTEXT>":
                            vv.getAppendVisitor().visitText(verseContent[i + 1]);
                            break;
                        default:
                            throw new RuntimeException("Internal error parsing verse content: " + verseContent[i]);
                    }
                }
                ch.getVerses().add(vv);
            }
            if (!content.startsWith("</CHAPTER>", offs))
                throw new IOException("<CHAPTER> tag not closed: " + babbr + "/" + bname);
            offs += 10;
            bk.getChapters().add(ch);
        }
        if (!content.startsWith("</BOOK>", offs))
            throw new IOException("<BOOK> tag not closed: " + babbr + "/" + bname);
        offs += 7;
        bbl.getBooks().add(bk);
    }
    if (!content.substring(offs).equals("</BIBLE>"))
        throw new IOException("Unknown tag, </BIBLE> expected");
    return bbl;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 5 with Bible

use of biblemulticonverter.data.Bible in project BibleMultiConverter by schierlm.

the class ZefaniaXMLRoundtrip method parseBible.

protected Bible parseBible(XMLBIBLE doc) throws Exception {
    Bible result = new Bible(doc.getBiblename());
    MetadataBook metadata = new MetadataBook();
    if (doc.getStatus() != null) {
        metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
    }
    if (doc.getVersion() != null) {
        metadata.setValue(MetadataBookKey.version, doc.getVersion());
    }
    if (doc.getRevision() != null) {
        metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
    }
    for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
        if (elem.getValue() == null)
            continue;
        String value = normalize(elem.getValue().toString(), true).trim();
        if (value.length() == 0)
            value = "-empty-";
        metadata.setValue(elem.getName().getLocalPart(), value);
    }
    metadata.finished();
    if (metadata.getKeys().size() > 0)
        result.getBooks().add(metadata.getBook());
    Set<String> abbrs = new HashSet<String>();
    Set<String> shortnames = new HashSet<String>();
    Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
    List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        int number = e.getBnumber().intValue();
        BookID bookID = BookID.fromZefId(number);
        if (shortname == null)
            shortname = "_" + bookID.getOsisID();
        else if (shortname.length() == 0)
            shortname = "_" + bookID.getOsisID() + "[[]]";
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        abbrMap.put(bookID, abbr);
    }
    abbrs.clear();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        String longname = e.getBname();
        int number = e.getBnumber().intValue();
        BookID bookID = BookID.fromZefId(number);
        if (shortname == null)
            shortname = "_" + bookID.getOsisID();
        else if (shortname.length() == 0)
            shortname = "_" + bookID.getOsisID() + "[[]]";
        if (longname == null)
            longname = "_" + bookID.getEnglishName();
        else if (longname.length() == 0)
            longname = "_" + bookID.getEnglishName() + "[[]]";
        else
            longname = longname.replaceAll("  ++", " ").trim();
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
            System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
            shortname = "Exo[[Gen]]";
            longname = "Exodus[[Genesis]]";
        }
        if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
            System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
            shortname = "2Chr[[1Chr]]";
        }
        if (shortnames.contains(shortname)) {
            System.out.println("WARNING: Duplicate short name " + shortname);
            for (int i = 2; i < 100; i++) {
                if (!shortnames.contains(shortname + i + "[[" + shortname + "]]")) {
                    shortname = shortname + i + "[[" + shortname + "]]";
                    break;
                }
            }
        }
        shortnames.add(shortname);
        Book book = new Book(abbr, bookID, shortname, longname);
        int lastvref = -1;
        List<Headline> headlineBuffer = new ArrayList<Headline>();
        for (CHAPTER e2 : e.getCHAPTER()) {
            int chapterNumber = e2.getCnumber().intValue();
            while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
            Chapter chapter = book.getChapters().get(chapterNumber - 1);
            for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
                if (e3 instanceof CAPTION) {
                    CAPTION caption = (CAPTION) e3;
                    if (lastvref != -1 && lastvref != caption.getVref().intValue())
                        throw new IOException();
                    lastvref = caption.getVref().intValue();
                    int level;
                    if (caption.getType() == null) {
                        level = 9;
                    } else {
                        switch(caption.getType()) {
                            case X_H_1:
                                level = 1;
                                break;
                            case X_H_2:
                                level = 2;
                                break;
                            case X_H_3:
                                level = 3;
                                break;
                            case X_H_4:
                                level = 4;
                                break;
                            case X_H_5:
                                level = 5;
                                break;
                            case X_H_6:
                                level = 6;
                                break;
                            default:
                                throw new IOException();
                        }
                    }
                    Headline h = new Headline(level);
                    headlineBuffer.add(h);
                    if (!parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
                        visitEmptyMarker(h.getAppendVisitor());
                    } else {
                        h.trimWhitespace();
                    }
                    h.finished();
                } else if (e3 instanceof REMARK) {
                    REMARK remark = (REMARK) e3;
                    int vref = remark.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        throw new IOException(vref + ":" + remark.getContent());
                    Verse v = chapter.getVerses().get(idx);
                    if (remark.getContent().size() != 1)
                        throw new IOException();
                    String remarkText = normalize((String) remark.getContent().get(0), true).trim();
                    v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "remark").visitFootnote().visitText(remarkText);
                } else if (e3 instanceof XREF) {
                    XREF xref = (XREF) e3;
                    int vref = xref.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        throw new IOException(vref + ":" + xref.getMscope());
                    Verse v = chapter.getVerses().get(idx);
                    Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "outer-xref").visitFootnote();
                    boolean first = true;
                    for (String mscope : xref.getMscope().split(" ")) {
                        Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                        if (!m.matches())
                            throw new IOException(mscope);
                        BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                        int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
                        if (m.group(3) != null)
                            endChapter = Integer.parseInt(m.group(3).substring(1));
                        String verse = m.group(4);
                        if (verse.equals("0"))
                            verse = "1//G";
                        String endVerse = m.group(5);
                        if (endVerse == null)
                            endVerse = verse;
                        else
                            endVerse = endVerse.substring(1);
                        if (endVerse.equals("0"))
                            endVerse = "1//G";
                        String xrefAbbr = abbrMap.get(xrefBookID);
                        if (xrefAbbr == null)
                            xrefAbbr = xrefBookID.getOsisID();
                        if (first)
                            first = false;
                        else
                            footnoteVisitor.visitText(" ");
                        if (xrefChapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
                            String tmp = verse;
                            verse = endVerse;
                            endVerse = tmp;
                        }
                        footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
                    }
                } else if (e3 instanceof PROLOG) {
                    PROLOG prolog = (PROLOG) e3;
                    if (prolog.getVref().intValue() != 1)
                        throw new IOException("" + prolog.getVref());
                    if (chapter.getProlog() != null)
                        throw new IOException("More than one prolog found");
                    FormattedText prologText = new FormattedText();
                    if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
                        prologText.trimWhitespace();
                        prologText.finished();
                        chapter.setProlog(prologText);
                    }
                } else if (e3 instanceof VERS) {
                    VERS vers = (VERS) e3;
                    int vnumber = vers.getVnumber().intValue();
                    if (lastvref != -1) {
                        if (lastvref != vnumber)
                            throw new IOException(lastvref + " != " + vnumber);
                        lastvref = -1;
                    }
                    Verse verse = new Verse("" + vnumber);
                    Visitor<RuntimeException> visitor = verse.getAppendVisitor();
                    boolean contentFound = false;
                    if (headlineBuffer.size() > 0) {
                        for (Headline h : headlineBuffer) {
                            h.accept(visitor.visitHeadline(h.getDepth()));
                        }
                        headlineBuffer.clear();
                        contentFound = true;
                    }
                    contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
                    if (!contentFound) {
                        visitEmptyMarker(visitor);
                    }
                    verse.trimWhitespace();
                    chapter.getVerses().add(verse);
                } else {
                    throw new IOException(e3.getClass().toString());
                }
            }
            for (Verse v : chapter.getVerses()) v.finished();
        }
        result.getBooks().add(book);
    }
    return result;
}
Also used : Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) BookID(biblemulticonverter.data.BookID) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) PROLOG(biblemulticonverter.schema.zef2005.PROLOG) EnumMap(java.util.EnumMap) REMARK(biblemulticonverter.schema.zef2005.REMARK) HashSet(java.util.HashSet) MetadataBook(biblemulticonverter.data.MetadataBook) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) XREF(biblemulticonverter.schema.zef2005.XREF) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Aggregations

Bible (biblemulticonverter.data.Bible)20 Book (biblemulticonverter.data.Book)18 Chapter (biblemulticonverter.data.Chapter)18 FormattedText (biblemulticonverter.data.FormattedText)15 Verse (biblemulticonverter.data.Verse)15 BookID (biblemulticonverter.data.BookID)12 MetadataBook (biblemulticonverter.data.MetadataBook)10 IOException (java.io.IOException)10 ArrayList (java.util.ArrayList)9 Headline (biblemulticonverter.data.FormattedText.Headline)6 VirtualVerse (biblemulticonverter.data.VirtualVerse)6 File (java.io.File)5 EnumMap (java.util.EnumMap)5 HashMap (java.util.HashMap)5 Matcher (java.util.regex.Matcher)5 BufferedReader (java.io.BufferedReader)4 Visitor (biblemulticonverter.data.FormattedText.Visitor)3 HashSet (java.util.HashSet)3 BIBLEBOOK (biblemulticonverter.schema.zef2005.BIBLEBOOK)2 CAPTION (biblemulticonverter.schema.zef2005.CAPTION)2