Search in sources :

Example 1 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class StrongDictionary method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    Bible result = new Bible("Strong's dictionary");
    MetadataBook mb = new MetadataBook();
    mb.setValue(MetadataBookKey.description, "Strong's dictionary compiled by BibleMultiConverter from public sources.");
    mb.setValue(MetadataBookKey.source, "https://github.com/openscriptures/HebrewLexicon/ and https://github.com/morphgnt/strongs-dictionary-xml/");
    mb.setValue(MetadataBookKey.rights, "Strong's Greek Dictionary is in the public domain. Strong's Hebrew Dictionary is provided as XML files by the Open Scriptures Hebrew Bible Project, which are licensed CC-BY-4.0.");
    mb.finished();
    result.getBooks().add(mb.getBook());
    DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    Document doc;
    try (InputStream in = new URL("https://raw.githubusercontent.com/morphgnt/strongs-dictionary-xml/master/strongsgreek.xml").openStream()) {
        doc = db.parse(in);
    }
    for (Node entryNode = doc.getDocumentElement().getLastChild().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
        Element entry = (Element) entryNode;
        int number = Integer.parseInt(entry.getAttribute("strongs"));
        System.out.println("G" + number);
        Book bk = new Book("G" + number, BookID.DICTIONARY_ENTRY, "G" + number, "G" + number);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        result.getBooks().add(bk);
        Visitor<RuntimeException> v = prolog.getAppendVisitor();
        for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode instanceof Text) {
                if (childNode.getTextContent().replaceAll("[ \r\n\t]+", " ").equals(" or ") && childNode.getNextSibling().getNodeName().equals("greek")) {
                    v.visitFormattingInstruction(FormattingInstructionKind.ITALIC).visitText("-or-");
                    v.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (childNode.getTextContent().trim().length() > 0) {
                    visitAttribute(v, "Remark", childNode.getTextContent());
                }
                continue;
            }
            Element elem = (Element) childNode;
            switch(elem.getNodeName()) {
                case "strongs":
                    int compNumber = Integer.parseInt(elem.getTextContent());
                    if (compNumber != number)
                        throw new IOException(compNumber + " != " + number);
                    break;
                case "greek":
                    v.visitHeadline(1).visitText(elem.getAttribute("unicode"));
                    visitAttribute(v, "Transliteration", elem.getAttribute("translit"));
                    break;
                case "pronunciation":
                    visitAttribute(v, "Pronunciation", elem.getAttribute("strongs"));
                    break;
                case "strongs_derivation":
                    visitAttribute(v, "Strongs Derivation", parseGreekContent(elem));
                    break;
                case "strongs_def":
                    visitAttribute(v, "Strongs Definition", parseGreekContent(elem));
                    break;
                case "kjv_def":
                    visitAttribute(v, "KJV Definition", parseGreekContent(elem));
                    if (elem.getNextSibling() != null && !elem.getNextSibling().getNodeName().equals("see")) {
                        Element moreInfo = doc.createElement("more_info");
                        elem.getParentNode().insertBefore(moreInfo, elem.getNextSibling());
                        while (moreInfo.getNextSibling() != null) {
                            if (moreInfo.getNextSibling().getNodeName().equals("see"))
                                break;
                            moreInfo.appendChild(moreInfo.getNextSibling());
                        }
                        if (moreInfo.getTextContent().trim().isEmpty())
                            moreInfo.getParentNode().removeChild(moreInfo);
                    }
                    break;
                case "strongsref":
                    visitAttribute(v, "Reference", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
                case "more_info":
                    visitAttribute(v, "More Information", parseGreekContent(elem));
                    break;
                case "see":
                    visitAttribute(v, "See Also", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
                    break;
                default:
                    throw new IOException(elem.getNodeName());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    try (InputStream in = new URL("https://raw.githubusercontent.com/openscriptures/HebrewLexicon/master/HebrewStrong.xml").openStream()) {
        doc = db.parse(in);
    }
    for (Node entryNode = doc.getDocumentElement().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
        if (entryNode instanceof Text) {
            if (!entryNode.getTextContent().trim().isEmpty()) {
                throw new IOException(entryNode.getTextContent());
            }
            continue;
        }
        Element entry = (Element) entryNode;
        String id = entry.getAttribute("id");
        System.out.println(id);
        Book bk = new Book(id, BookID.DICTIONARY_ENTRY, id, id);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        result.getBooks().add(bk);
        Visitor<RuntimeException> v = prolog.getAppendVisitor();
        for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode instanceof Text) {
                if (!childNode.getTextContent().trim().isEmpty()) {
                    throw new IOException(childNode.getTextContent());
                }
                continue;
            }
            Element elem = (Element) childNode;
            switch(elem.getNodeName()) {
                case "w":
                    v.visitHeadline(1).visitText(elem.getTextContent());
                    visitAttribute(v, "Transliteration", elem.getAttribute("xlit"));
                    visitAttribute(v, "Pronunciation", elem.getAttribute("pron"));
                    if (elem.getAttribute("xml:lang").equals("heb")) {
                        visitAttribute(v, "Language", "Hebrew");
                    } else if (elem.getAttribute("xml:lang").equals("arc")) {
                        visitAttribute(v, "Language", "Aramaic");
                    } else if (elem.getAttribute("xml:lang").equals("x-pn")) {
                        visitAttribute(v, "Language", "Proper Noun");
                    } else {
                        throw new IOException(elem.getAttribute("xml:lang"));
                    }
                    visitAttribute(v, "Part of speech", elem.getAttribute("pos"));
                    break;
                case "source":
                    visitAttribute(v, "Source", parseHebrewContent(elem));
                    break;
                case "meaning":
                    visitAttribute(v, "Meaning", parseHebrewContent(elem));
                    break;
                case "usage":
                    visitAttribute(v, "Usage", parseHebrewContent(elem));
                    break;
                case "note":
                    // skip
                    break;
                default:
                    throw new IOException(elem.getNodeName());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    return result;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) InputStream(java.io.InputStream) Bible(biblemulticonverter.data.Bible) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) Chapter(biblemulticonverter.data.Chapter) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) FormattedText(biblemulticonverter.data.FormattedText) IOException(java.io.IOException) Document(org.w3c.dom.Document) URL(java.net.URL) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book)

Example 2 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class UnboundBible method doExport.

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
    MetadataBook mb = bible.getMetadataBook();
    if (mb == null)
        mb = new MetadataBook();
    String fileTypeName = mb.getValue("filetype@unbound");
    UnboundBibleFileType fileType = fileTypeName != null ? UnboundBibleFileType.valueOf(fileTypeName) : UnboundBibleFileType.Unmapped_BCVS;
    Map<String, List<String[]>> mapping = new HashMap<>();
    Map<BookID, List<String>> extraEmptyVerses = new EnumMap<>(BookID.class);
    List<Book> allBooks = new ArrayList<>(bible.getBooks());
    if (exportArgs.length == 2) {
        try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(exportArgs[1]), StandardCharsets.UTF_8))) {
            String line;
            List<Book> nonexistingExtraEmptyVersesBooks = new ArrayList<>();
            while ((line = br.readLine()) != null) {
                if (line.isEmpty() || line.startsWith("#"))
                    continue;
                String[] fields = line.split("\t", -1);
                if (fields.length != 8)
                    throw new IOException(line);
                int isNull = Integer.parseInt(fields[7]);
                if (isNull == 0) {
                    String key = fields[3] + " " + fields[4] + ":" + fields[5] + " " + fields[6];
                    String[] value = Arrays.copyOf(fields, 3);
                    if (!mapping.containsKey(key))
                        mapping.put(key, new ArrayList<String[]>());
                    mapping.get(key).add(value);
                } else if (isNull == 1) {
                    BookID id = BOOK_INFO_BY_CODE.get(fields[3]).id;
                    if (!extraEmptyVerses.containsKey(id))
                        extraEmptyVerses.put(id, new ArrayList<String>());
                    extraEmptyVerses.get(id).add(fields[0] + "\t" + fields[1] + "\t" + fields[2] + "\t" + fields[3] + "\t" + fields[4] + "\t" + fields[5] + "\t" + fields[6] + "\t0\t");
                    Book existingBook = null;
                    for (Book bk : allBooks) {
                        if (bk.getId() == id)
                            existingBook = bk;
                    }
                    if (existingBook == null) {
                        if (nonexistingExtraEmptyVersesBooks.isEmpty() || nonexistingExtraEmptyVersesBooks.get(nonexistingExtraEmptyVersesBooks.size() - 1).getId() != id)
                            nonexistingExtraEmptyVersesBooks.add(new Book(id.getOsisID(), id, id.getOsisID(), id.getOsisID()));
                    } else if (!nonexistingExtraEmptyVersesBooks.isEmpty()) {
                        int pos = allBooks.indexOf(existingBook);
                        allBooks.addAll(pos, nonexistingExtraEmptyVersesBooks);
                        nonexistingExtraEmptyVersesBooks.clear();
                    }
                } else {
                    throw new IOException(line);
                }
            }
        }
        fileType = UnboundBibleFileType.Mapped_BCVS;
    }
    boolean useRoundtrip = Boolean.getBoolean("unbound.roundtrip");
    boolean useParsedFormat = Boolean.getBoolean("unbound.parsed");
    try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(exportArgs[0]), StandardCharsets.UTF_8))) {
        if (exportArgs.length == 2 && useRoundtrip)
            bw.write("\r\n");
        bw.write("#THE UNBOUND BIBLE (www.unboundbible.org)\r\n");
        bw.write("#name\t" + bible.getName() + "\r\n");
        bw.write("#filetype\t" + fileType.name().replace('_', '-') + "\r\n");
        writeMetadata(bw, "copyright", mb.getValue(MetadataBookKey.rights));
        writeMetadata(bw, "abbreviation", mb.getValue("abbreviation@unbound"));
        writeMetadata(bw, "language", mb.getValue(MetadataBookKey.language));
        writeMetadata(bw, "note", mb.getValue(MetadataBookKey.description));
        bw.write("#columns\t" + fileType.getColumnHeader() + "\r\n");
        int[] sorting = { 0 };
        for (Book bk : allBooks) {
            if (bk.getId() == BookID.METADATA)
                continue;
            UnboundBibleBookInfo bi = BOOK_INFO_BY_ID.get(bk.getId());
            if (bi == null) {
                System.out.println("WARNING: Skipping unsupported book: " + bk.getAbbr());
                continue;
            }
            if (extraEmptyVerses.containsKey(bk.getId())) {
                for (String emptyVerse : extraEmptyVerses.get(bk.getId())) {
                    bw.write(emptyVerse + "\r\n");
                }
            }
            for (int cc = 0; cc < bk.getChapters().size(); cc++) {
                Chapter ch = bk.getChapters().get(cc);
                int chapter = cc + 1;
                for (Verse vv : ch.getVerses()) {
                    String vn = vv.getNumber(), svn = "";
                    int c = chapter;
                    if (vn.matches("[0-9]+,.*")) {
                        int pos = vn.indexOf(',');
                        c = Integer.parseInt(vn.substring(0, pos));
                        vn = vn.substring(pos + 1);
                    }
                    if (vn.equals("1-/") || vn.equals("1-//")) {
                        vn = "0";
                    } else if (c == 1 && vn.endsWith("//")) {
                        c = 0;
                        vn = vn.substring(0, vn.length() - 2);
                    } else if (vn.endsWith("/a")) {
                        vn = vn.substring(0, vn.length() - 2);
                        svn = "EndA";
                    } else if (vn.endsWith("/b")) {
                        vn = vn.substring(0, vn.length() - 2);
                        svn = "EndB";
                    } else if (vn.endsWith("/")) {
                        vn = vn.substring(0, vn.length() - 1);
                    } else if (vn.matches("[0-9]+[.][a-z]")) {
                        svn = vn.substring(vn.length() - 1) + vn.substring(vn.length() - 1);
                        vn = vn.substring(0, vn.length() - 2);
                    } else if (!vn.matches("[0-9]+")) {
                        Matcher m = Pattern.compile("([0-9]+)([-,/.a-zG][-0-9,/.a-zG]*)").matcher(vn);
                        if (!m.matches())
                            throw new IOException(vn);
                        vn = m.group(1);
                        svn = m.group(2);
                    }
                    int v = Integer.parseInt(vn);
                    sorting[0] += 10;
                    StringBuilder sb = new StringBuilder();
                    vv.accept(new UnboundBibleVisitor(sb, sorting, useParsedFormat));
                    String text = sb.toString();
                    if (useRoundtrip && text.contains("\uFEFF")) {
                        if (text.equals("\uFEFF-\uFEFF"))
                            text = "";
                        text = text.replace("\uFEFF ", " ").replace(" \uFEFF", " ");
                    }
                    for (String[] nrsva_fields : lookup(mapping, bi.code, c, v, svn)) {
                        String[] fields = new String[] { nrsva_fields[0], nrsva_fields[1], nrsva_fields[2], bi.code, "" + c, "" + v, svn, "" + sorting[0], text };
                        fileType.writeFields(bw, fields, "nrsva_book_index", "nrsva_chapter", "nrsva_verse", "orig_book_index", "orig_chapter", "orig_verse", "orig_subverse", "order_by", "text");
                    }
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) BufferedWriter(java.io.BufferedWriter) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) ArrayList(java.util.ArrayList) List(java.util.List) EnumMap(java.util.EnumMap) MetadataBook(biblemulticonverter.data.MetadataBook) InputStreamReader(java.io.InputStreamReader) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) Verse(biblemulticonverter.data.Verse)

Example 3 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class UnboundBible method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    boolean useRoundtrip = Boolean.getBoolean("unbound.roundtrip");
    boolean useParsedFormat = Boolean.getBoolean("unbound.parsed");
    try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile), StandardCharsets.UTF_8))) {
        String line = br.readLine();
        if (line.isEmpty())
            // mapped ones have an extra empty line...
            line = br.readLine();
        if (!line.equals("#THE UNBOUND BIBLE (www.unboundbible.org)"))
            throw new IOException(line);
        line = br.readLine();
        if (!line.startsWith("#name\t"))
            throw new IOException(line);
        Bible result = new Bible(line.substring(6));
        MetadataBook mb = new MetadataBook();
        result.getBooks().add(mb.getBook());
        line = br.readLine();
        if (!line.startsWith("#filetype\t"))
            throw new IOException(line);
        UnboundBibleFileType filetype = UnboundBibleFileType.valueOf(line.substring(10).replace('-', '_'));
        if (filetype == UnboundBibleFileType.Unmapped_BCV && useRoundtrip) {
            mb.setValue("filetype@unbound", filetype.toString());
        }
        readMetadata(br, mb, "copyright", MetadataBookKey.rights.toString());
        readMetadata(br, mb, "abbreviation", "abbreviation@unbound");
        readMetadata(br, mb, "language", MetadataBookKey.language.toString());
        readMetadata(br, mb, "note", MetadataBookKey.description.toString());
        mb.finished();
        line = br.readLine();
        if (!line.equals("#columns\t" + filetype.getColumnHeader()))
            throw new IOException(line);
        Map<BookID, Book> books = new HashMap<>();
        int sorting = -1, lastChapter = 0;
        String[] lastFields = new String[0];
        while ((line = br.readLine()) != null) {
            if (line.startsWith("#"))
                throw new IOException(line);
            if (line.trim().isEmpty())
                continue;
            String[] fields = filetype.parseFields(line, "orig_book_index", "orig_chapter", "orig_verse", "orig_subverse", "order_by", "text");
            if (fields[4] != null && Arrays.equals(fields, lastFields))
                continue;
            if (fields[2].isEmpty() && fields[4].equals("0") && fields[5].isEmpty())
                continue;
            UnboundBibleBookInfo bi = BOOK_INFO_BY_CODE.get(fields[0]);
            if (bi == null)
                throw new IOException("Invalid book code: " + fields[0] + " in " + line);
            Book bk = books.get(bi.id);
            if (bk == null) {
                bk = new Book(bi.id.getOsisID(), bi.id, bi.name, bi.name);
                result.getBooks().add(bk);
                books.put(bi.id, bk);
                lastChapter = 0;
            }
            int chapter = Integer.parseInt(fields[1]);
            String verse = "" + Integer.parseInt(fields[2]);
            if (chapter == 0) {
                chapter = 1;
                verse += "//";
            } else if (verse.equals("0")) {
                verse = "1-/";
            }
            String subverse = fields[3];
            if (subverse != null && !subverse.isEmpty()) {
                if (subverse.length() == 1 && subverse.charAt(0) >= 'a' && subverse.charAt(0) <= 'z') {
                    verse += subverse;
                } else if (subverse.length() == 2 && subverse.charAt(0) == subverse.charAt(1) && subverse.charAt(0) >= 'a' && subverse.charAt(0) <= 'z') {
                    verse += "." + subverse.charAt(0);
                } else if (subverse.matches("[.-][0-9]+")) {
                    verse += subverse;
                } else if (subverse.equals("EndA")) {
                    verse += "/a";
                } else if (subverse.equals("EndB")) {
                    verse += "/b";
                } else {
                    throw new IOException(subverse);
                }
            }
            if (chapter < lastChapter) {
                System.out.println("WARNING: Verses reordered across chapters detected");
                verse = chapter + "," + verse;
                chapter = lastChapter;
            }
            lastChapter = chapter;
            int sortingDiff = 0;
            if (fields[4] == null) {
                if (sorting != -1)
                    throw new IOException("Inconsistent sorting: " + line);
            } else {
                int s = Integer.parseInt(fields[4]);
                if (s <= sorting && lastFields[2].equals(fields[2]))
                    throw new IOException("Inconsistent sorting: " + s + " <= last " + sorting + " in " + line);
                if (s != (sorting == -1 ? 10 : sorting + 10)) {
                    sortingDiff = s - (sorting == -1 ? 10 : sorting + 10);
                }
                sorting = s;
                if (lastFields.length > 5 && lastFields[5].equals(fields[5]) && lastFields[2].equals(fields[2]))
                    System.out.println("WARNING: Same verse text as previous: " + line);
            }
            lastFields = fields;
            String text = fields[5];
            if (useRoundtrip) {
                String last;
                do {
                    last = text;
                    text = text.replace("  ", " \uFEFF ");
                } while (!last.equals(text));
                if (text.endsWith(" "))
                    text += "\uFEFF";
                if (text.startsWith(" "))
                    text = "\uFEFF" + text;
                if (text.length() == 0)
                    text = "\uFEFF-\uFEFF";
            } else {
                text = text.replaceAll("  +", " ").trim();
                if (text.length() == 0) {
                    if (bk.getChapters().size() == 0) {
                        books.remove(bk.getId());
                        result.getBooks().remove(bk);
                    }
                    continue;
                }
            }
            while (bk.getChapters().size() < chapter) bk.getChapters().add(new Chapter());
            if (bk.getChapters().size() != chapter && useRoundtrip)
                throw new RuntimeException("Invalid chapter order: " + bk.getId() + chapter + "/" + verse + " " + text);
            Chapter ch = bk.getChapters().get(chapter - 1);
            if (!ch.getVerses().isEmpty() && ch.getVerses().get(ch.getVerses().size() - 1).getNumber().equals(verse))
                verse += "/";
            Verse vv = new Verse(verse);
            Visitor<RuntimeException> vvv = vv.getAppendVisitor();
            if (useParsedFormat) {
                String[] words = text.split(" ");
                int[] strongs = new int[10];
                String[] rmacs = new String[10];
                int strongCount = 0, rmacCount = 0;
                String word = words[0];
                for (int i = 1; i < words.length; i++) {
                    if (words[i].matches("[GH][0-9]+")) {
                        strongs[strongCount++] = Integer.parseInt(words[i].substring(1));
                    } else if (words[i].matches(Utils.RMAC_REGEX)) {
                        rmacs[rmacCount++] = words[i];
                    } else {
                        if (strongCount > 0 || rmacCount > 0) {
                            vvv.visitGrammarInformation(strongCount > 0 ? Arrays.copyOf(strongs, strongCount) : null, rmacCount > 0 ? Arrays.copyOf(rmacs, rmacCount) : null, null).visitText(word);
                            strongCount = rmacCount = 0;
                        } else {
                            vvv.visitText(word);
                        }
                        vvv.visitText(" ");
                        word = words[i];
                    }
                }
                if (strongCount > 0 || rmacCount > 0) {
                    vvv.visitGrammarInformation(strongCount > 0 ? Arrays.copyOf(strongs, strongCount) : null, rmacCount > 0 ? Arrays.copyOf(rmacs, rmacCount) : null, null).visitText(word);
                    strongCount = rmacCount = 0;
                } else {
                    vvv.visitText(word);
                }
            } else {
                vvv.visitText(text);
            }
            if (useRoundtrip && sortingDiff != 0) {
                vvv.visitExtraAttribute(ExtraAttributePriority.SKIP, "unbound", "sorting-diff", "" + sortingDiff);
            }
            vv.finished();
            ch.getVerses().add(vv);
        }
        return result;
    }
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) BufferedReader(java.io.BufferedReader) Verse(biblemulticonverter.data.Verse)

Example 4 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class YCHPalmBible method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    String content = new String(Files.readAllBytes(inputFile.toPath()), StandardCharsets.ISO_8859_1);
    content = content.replaceAll("[\r\n\t ]+", " ").replace(" <", "<").replaceAll("> ", ">");
    if (!content.startsWith("<PARSERINFO "))
        throw new IOException("Invalid file, does not start with <PARSERINFO>");
    int pos = content.indexOf('>');
    Map<String, String> params = parseParams(content.substring(12, pos));
    String charset = params.get("DECODE");
    if (charset == null)
        charset = params.get("ENCODE");
    else
        charset = "ISO-8859-1";
    content = new String(content.substring(pos + 1).getBytes(StandardCharsets.ISO_8859_1), charset);
    if (!content.startsWith("<BIBLE ")) {
        throw new IOException("Missing tag <BIBLE>");
    }
    pos = content.indexOf('>');
    params = parseParams(content.substring(7, pos));
    String name = params.get("NAME");
    String info = params.get("INFO");
    if (name == null || name.isEmpty())
        name = "Untitled YCHPalmBible bible";
    Bible bbl = new Bible(name);
    if (info != null && !info.equals(name)) {
        MetadataBook mb = new MetadataBook();
        mb.setValue(MetadataBookKey.description, info);
        bbl.getBooks().add(mb.getBook());
    }
    int offs = pos + 1;
    while (content.startsWith("<BOOK ", offs)) {
        pos = content.indexOf('>', offs);
        params = parseParams(content.substring(offs + 6, pos));
        offs = pos + 1;
        String bname = params.get("NAME");
        int bnumber = Integer.parseInt(params.get("NUMBER"));
        String babbr = params.get("SHORTCUT");
        BookID bid = null;
        for (int i = 0; i < PALM_BOOK_NUMBERS.length; i++) {
            if (PALM_BOOK_NUMBERS[i] == bnumber)
                bid = BookID.fromZefId(i);
        }
        if (bid == null)
            throw new IOException("Unsupported BOOK NUMBER: " + bnumber);
        Book bk = new Book(babbr, bid, bname, bname);
        while (content.startsWith("<CHAPTER>", offs)) {
            offs += 9;
            Chapter ch = new Chapter();
            int vnum = 1;
            while (content.startsWith("<VERSE>", offs)) {
                pos = content.indexOf("</VERSE>", offs);
                String[] verseContent = parseVerseContent(content.substring(offs + 7, pos));
                offs = pos + 8;
                Verse vv = new Verse("" + vnum);
                vnum++;
                if (verseContent.length == 1) {
                    if (verseContent[0].isEmpty())
                        continue;
                    verseContent = new String[] { "", "<VERSTEXT>", verseContent[0] };
                }
                if (!verseContent[0].isEmpty())
                    throw new IOException("Untagged text inside verse: " + verseContent[0]);
                for (int i = 1; i < verseContent.length; i += 2) {
                    switch(verseContent[i]) {
                        case "<BOOKTEXT>":
                            if (bk.getChapters().size() > 0) {
                                throw new IOException("<BOOKTEXT> not in first chapter");
                            }
                            bk = new Book(babbr, bid, bname, verseContent[i + 1]);
                            break;
                        case "<CHAPTEXT>":
                            vv.getAppendVisitor().visitHeadline(1).visitText(verseContent[i + 1]);
                            break;
                        case "<DESCTEXT>":
                            vv.getAppendVisitor().visitHeadline(9).visitText(verseContent[i + 1]);
                            break;
                        case "<VERSTEXT>":
                            vv.getAppendVisitor().visitText(verseContent[i + 1]);
                            break;
                        default:
                            throw new RuntimeException("Internal error parsing verse content: " + verseContent[i]);
                    }
                }
                ch.getVerses().add(vv);
            }
            if (!content.startsWith("</CHAPTER>", offs))
                throw new IOException("<CHAPTER> tag not closed: " + babbr + "/" + bname);
            offs += 10;
            bk.getChapters().add(ch);
        }
        if (!content.startsWith("</BOOK>", offs))
            throw new IOException("<BOOK> tag not closed: " + babbr + "/" + bname);
        offs += 7;
        bbl.getBooks().add(bk);
    }
    if (!content.substring(offs).equals("</BIBLE>"))
        throw new IOException("Unknown tag, </BIBLE> expected");
    return bbl;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) Bible(biblemulticonverter.data.Bible) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 5 with MetadataBook

use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.

the class ZefDic method createXMLBible.

protected Dictionary createXMLBible(Bible bible) throws Exception {
    final ObjectFactory of = new ObjectFactory();
    Dictionary doc = of.createDictionary();
    doc.setDicversion("1");
    doc.setRevision("1");
    doc.setRefbible("any");
    doc.setType(TEnumDicType.X_DICTIONARY);
    String title = null;
    if (bible.getName().matches("X_(DICTIONARY|COMMENTARY|STRONG|DAILY)@.*")) {
        String[] parts = bible.getName().split("@", 2);
        doc.setType(TEnumDicType.valueOf(parts[0]));
        doc.setRefbible(parts[1]);
    } else {
        title = bible.getName();
    }
    doc.setINFORMATION(of.createTINFORMATION());
    doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName("title"), String.class, title));
    MetadataBook metadata = bible.getMetadataBook();
    if (metadata != null) {
        for (String key : metadata.getKeys()) {
            String value = metadata.getValue(key);
            if (value.equals("-empty-"))
                value = "";
            if (key.equals(MetadataBookKey.version.toString())) {
                doc.setDicversion(value);
            } else if (key.equals(MetadataBookKey.revision.toString())) {
                doc.setRevision(value);
            } else if (Arrays.asList(INFORMATION_KEYS).contains(key)) {
                doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
            }
        }
    }
    for (Book bk : bible.getBooks()) {
        if (bk.getId().equals(BookID.METADATA))
            continue;
        if (!bk.getId().equals(BookID.DICTIONARY_ENTRY)) {
            System.out.println("WARNING: Unable to export book " + bk.getAbbr());
            continue;
        }
        final TItem item = of.createTItem();
        if (!bk.getLongName().equals(bk.getShortName())) {
            TItem itm = of.createTItem();
            itm.setId(bk.getShortName());
            appendTextElement(itm, "title", bk.getLongName());
            TParagraph para2 = of.createTParagraph();
            SeeType see = of.createSeeType();
            see.setContent(bk.getLongName());
            para2.getContent().add(new JAXBElement<SeeType>(new QName("see"), SeeType.class, see));
            itm.getContent().add(new JAXBElement<TParagraph>(new QName("description"), TParagraph.class, para2));
            doc.getItem().add(itm);
        }
        item.setId(bk.getLongName());
        doc.getItem().add(item);
        class ZefState {

            TParagraph para = of.createTParagraph();

            boolean eatParagraph = false;

            public void flushPara(TItem item) {
                item.getContent().add(new JAXBElement<TParagraph>(new QName("description"), TParagraph.class, para));
                para = of.createTParagraph();
            }
        }
        final ZefState state = new ZefState();
        FormattedText text = bk.getChapters().get(0).getProlog();
        class LevelVisitor implements Visitor<RuntimeException> {

            final List<Serializable> target;

            private LevelVisitor(ZefState state) {
                target = state.para.getContent();
            }

            private LevelVisitor(MyAnyType parent) {
                target = parent.getContent();
            }

            private LevelVisitor(TStyle parent) {
                target = parent.getContent();
            }

            @Override
            public int visitElementTypes(String elementTypes) throws RuntimeException {
                return 0;
            }

            @Override
            public Visitor<RuntimeException> visitHeadline(int depth) throws RuntimeException {
                System.out.println("WARNING: Nested headlines are not supported");
                return null;
            }

            @Override
            public void visitStart() throws RuntimeException {
            }

            @Override
            public void visitText(String text) throws RuntimeException {
                if (text.length() > 0)
                    target.add(text);
            }

            @Override
            public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
                System.out.println("WARNING: footnotes are not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws RuntimeException {
                if (firstChapter != lastChapter || !firstVerse.equals(lastVerse))
                    System.out.println("WARNING: Cross references to verse ranges are not supported");
                BibLinkType b = of.createBibLinkType();
                b.setBn("" + book.getZefID());
                b.setCn1("" + firstChapter);
                b.setVn1(firstVerse);
                target.add(new JAXBElement<BibLinkType>(new QName("bib_link"), BibLinkType.class, b));
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
                String tag;
                switch(kind) {
                    case BOLD:
                        tag = "strong";
                        break;
                    case ITALIC:
                        tag = "em";
                        break;
                    case SUPERSCRIPT:
                        tag = "sup";
                        break;
                    case SUBSCRIPT:
                        tag = "sub";
                        break;
                    default:
                        return visitCSSFormatting(kind.getCss());
                }
                MyAnyType mat = of.createMyAnyType();
                target.add(new JAXBElement<MyAnyType>(new QName(tag), MyAnyType.class, mat));
                return new LevelVisitor(mat);
            }

            @Override
            public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
                TStyle style = of.createTStyle();
                style.setCss(css);
                target.add(of.createTStyleSTYLE(style));
                return new LevelVisitor(style);
            }

            @Override
            public void visitVerseSeparator() throws RuntimeException {
                System.out.println("WARNING: Verse separators are not supported");
            }

            @Override
            public void visitLineBreak(LineBreakKind kind) throws RuntimeException {
                System.out.println("WARNING: Nested line breaks are not supported");
            }

            @Override
            public Visitor<RuntimeException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws RuntimeException {
                System.out.println("WARNING: Grammar information is not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitDictionaryEntry(String dictionary, String entry) throws RuntimeException {
                if (dictionary.equals("reflink")) {
                    RefLinkType r = of.createRefLinkType();
                    r.setMscope(entry.substring(1).replace('-', ';'));
                    target.add(new JAXBElement<RefLinkType>(new QName("reflink"), RefLinkType.class, r));
                } else {
                    SeeType see = of.createSeeType();
                    see.setTarget(dictionary.equals("dict") ? "x-self" : dictionary);
                    see.setContent(entry);
                    target.add(new JAXBElement<SeeType>(new QName("see"), SeeType.class, see));
                }
                return null;
            }

            @Override
            public void visitRawHTML(RawHTMLMode mode, String raw) throws RuntimeException {
                System.out.println("WARNING: Raw html output not supported");
            }

            @Override
            public Visitor<RuntimeException> visitVariationText(String[] variations) throws RuntimeException {
                throw new IllegalStateException("Variations not supported");
            }

            @Override
            public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
                return prio.handleVisitor(category, this);
            }

            @Override
            public boolean visitEnd() throws RuntimeException {
                return false;
            }
        }
        ;
        text.accept(new Visitor<RuntimeException>() {

            @Override
            public int visitElementTypes(String elementTypes) throws RuntimeException {
                return 0;
            }

            @Override
            public Visitor<RuntimeException> visitHeadline(int depth) throws RuntimeException {
                MyAnyType mat = of.createMyAnyType();
                JAXBElement<MyAnyType> elem = new JAXBElement<>(new QName("title"), MyAnyType.class, mat);
                if (depth == 1) {
                    state.flushPara(item);
                    item.getContent().add(elem);
                } else {
                    state.para.getContent().add(elem);
                }
                return new LevelVisitor(mat);
            }

            @Override
            public void visitStart() throws RuntimeException {
            }

            @Override
            public void visitText(String text) throws RuntimeException {
                new LevelVisitor(state).visitText(text);
            }

            @Override
            public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
                System.out.println("WARNING: footnotes are not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws RuntimeException {
                return new LevelVisitor(state).visitCrossReference(bookAbbr, book, firstChapter, firstVerse, lastChapter, lastVerse);
            }

            @Override
            public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
                return new LevelVisitor(state).visitFormattingInstruction(kind);
            }

            @Override
            public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
                return new LevelVisitor(state).visitCSSFormatting(css);
            }

            @Override
            public void visitVerseSeparator() throws RuntimeException {
                System.out.println("WARNING: Verse separators are not supported");
            }

            @Override
            public void visitLineBreak(LineBreakKind kind) throws RuntimeException {
                if (state.eatParagraph) {
                    state.eatParagraph = false;
                } else {
                    state.flushPara(item);
                    state.para = of.createTParagraph();
                }
            }

            @Override
            public Visitor<RuntimeException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws RuntimeException {
                System.out.println("WARNING: Grammar information is not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitDictionaryEntry(String dictionary, String entry) throws RuntimeException {
                return new LevelVisitor(state).visitDictionaryEntry(dictionary, entry);
            }

            @Override
            public void visitRawHTML(RawHTMLMode mode, String raw) throws RuntimeException {
                System.out.println("WARNING: Raw html output not supported");
            }

            @Override
            public Visitor<RuntimeException> visitVariationText(String[] variations) throws RuntimeException {
                throw new IllegalStateException("Variations not supported");
            }

            @Override
            public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
                if (prio == ExtraAttributePriority.KEEP_CONTENT && category.equals("zefdic")) {
                    // "zefdic", "field", "pronunciation");
                    return null;
                } else {
                    return prio.handleVisitor(category, this);
                }
            }

            @Override
            public boolean visitEnd() throws RuntimeException {
                return false;
            }
        });
        state.flushPara(item);
    }
    return doc;
}
Also used : Dictionary(biblemulticonverter.schema.zefdic1.Dictionary) TParagraph(biblemulticonverter.schema.zefdic1.TParagraph) ExtraAttributePriority(biblemulticonverter.data.FormattedText.ExtraAttributePriority) Visitor(biblemulticonverter.data.FormattedText.Visitor) TItem(biblemulticonverter.schema.zefdic1.TItem) RawHTMLMode(biblemulticonverter.data.FormattedText.RawHTMLMode) ObjectFactory(biblemulticonverter.schema.zefdic1.ObjectFactory) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) List(java.util.List) BibLinkType(biblemulticonverter.schema.zefdic1.BibLinkType) RefLinkType(biblemulticonverter.schema.zefdic1.RefLinkType) MetadataBook(biblemulticonverter.data.MetadataBook) TStyle(biblemulticonverter.schema.zefdic1.TStyle) MyAnyType(biblemulticonverter.schema.zefdic1.MyAnyType) QName(javax.xml.namespace.QName) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) FormattedText(biblemulticonverter.data.FormattedText) JAXBElement(javax.xml.bind.JAXBElement) LineBreakKind(biblemulticonverter.data.FormattedText.LineBreakKind) SeeType(biblemulticonverter.schema.zefdic1.SeeType)

Aggregations

Book (biblemulticonverter.data.Book)22 MetadataBook (biblemulticonverter.data.MetadataBook)22 Chapter (biblemulticonverter.data.Chapter)19 Verse (biblemulticonverter.data.Verse)17 BookID (biblemulticonverter.data.BookID)13 VirtualVerse (biblemulticonverter.data.VirtualVerse)12 Bible (biblemulticonverter.data.Bible)10 FormattedText (biblemulticonverter.data.FormattedText)10 Headline (biblemulticonverter.data.FormattedText.Headline)10 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)9 FileInputStream (java.io.FileInputStream)6 Visitor (biblemulticonverter.data.FormattedText.Visitor)5 BIBLEBOOK (biblemulticonverter.schema.zef2005.BIBLEBOOK)5 CAPTION (biblemulticonverter.schema.zef2005.CAPTION)5 CHAPTER (biblemulticonverter.schema.zef2005.CHAPTER)5 VERS (biblemulticonverter.schema.zef2005.VERS)5 File (java.io.File)5 FileOutputStream (java.io.FileOutputStream)5 EnumMap (java.util.EnumMap)5