Search in sources :

Example 6 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class StrongDictionary method doImport.

@Override
public Bible doImport(File inputFile) throws Exception {
    Bible result = new Bible("Strong's dictionary");
    MetadataBook mb = new MetadataBook();
    mb.setValue(MetadataBookKey.description, "Strong's dictionary compiled by BibleMultiConverter from public sources.");
    mb.setValue(MetadataBookKey.source, "https://github.com/openscriptures/HebrewLexicon/ and https://github.com/morphgnt/strongs-dictionary-xml/");
    mb.setValue(MetadataBookKey.rights, "Strong's Greek Dictionary is in the public domain. Strong's Hebrew Dictionary is provided as XML files by the Open Scriptures Hebrew Bible Project, which are licensed CC-BY-4.0.");
    mb.finished();
    result.getBooks().add(mb.getBook());
    DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    Document doc;
    try (InputStream in = new URL("https://raw.githubusercontent.com/morphgnt/strongs-dictionary-xml/master/strongsgreek.xml").openStream()) {
        doc = db.parse(in);
    }
    for (Node entryNode = doc.getDocumentElement().getLastChild().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
        Element entry = (Element) entryNode;
        int number = Integer.parseInt(entry.getAttribute("strongs"));
        System.out.println("G" + number);
        Book bk = new Book("G" + number, BookID.DICTIONARY_ENTRY, "G" + number, "G" + number);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        result.getBooks().add(bk);
        Visitor<RuntimeException> v = prolog.getAppendVisitor();
        for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode instanceof Text) {
                if (childNode.getTextContent().replaceAll("[ \r\n\t]+", " ").equals(" or ") && childNode.getNextSibling().getNodeName().equals("greek")) {
                    v.visitFormattingInstruction(FormattingInstructionKind.ITALIC).visitText("-or-");
                    v.visitLineBreak(LineBreakKind.PARAGRAPH);
                } else if (childNode.getTextContent().trim().length() > 0) {
                    visitAttribute(v, "Remark", childNode.getTextContent());
                }
                continue;
            }
            Element elem = (Element) childNode;
            switch(elem.getNodeName()) {
                case "strongs":
                    int compNumber = Integer.parseInt(elem.getTextContent());
                    if (compNumber != number)
                        throw new IOException(compNumber + " != " + number);
                    break;
                case "greek":
                    v.visitHeadline(1).visitText(elem.getAttribute("unicode"));
                    visitAttribute(v, "Transliteration", elem.getAttribute("translit"));
                    break;
                case "pronunciation":
                    visitAttribute(v, "Pronunciation", elem.getAttribute("strongs"));
                    break;
                case "strongs_derivation":
                    visitAttribute(v, "Strongs Derivation", parseGreekContent(elem));
                    break;
                case "strongs_def":
                    visitAttribute(v, "Strongs Definition", parseGreekContent(elem));
                    break;
                case "kjv_def":
                    visitAttribute(v, "KJV Definition", parseGreekContent(elem));
                    if (elem.getNextSibling() != null && !elem.getNextSibling().getNodeName().equals("see")) {
                        Element moreInfo = doc.createElement("more_info");
                        elem.getParentNode().insertBefore(moreInfo, elem.getNextSibling());
                        while (moreInfo.getNextSibling() != null) {
                            if (moreInfo.getNextSibling().getNodeName().equals("see"))
                                break;
                            moreInfo.appendChild(moreInfo.getNextSibling());
                        }
                        if (moreInfo.getTextContent().trim().isEmpty())
                            moreInfo.getParentNode().removeChild(moreInfo);
                    }
                    break;
                case "strongsref":
                    visitAttribute(v, "Reference", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
                case "more_info":
                    visitAttribute(v, "More Information", parseGreekContent(elem));
                    break;
                case "see":
                    visitAttribute(v, "See Also", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
                    break;
                default:
                    throw new IOException(elem.getNodeName());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    try (InputStream in = new URL("https://raw.githubusercontent.com/openscriptures/HebrewLexicon/master/HebrewStrong.xml").openStream()) {
        doc = db.parse(in);
    }
    for (Node entryNode = doc.getDocumentElement().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
        if (entryNode instanceof Text) {
            if (!entryNode.getTextContent().trim().isEmpty()) {
                throw new IOException(entryNode.getTextContent());
            }
            continue;
        }
        Element entry = (Element) entryNode;
        String id = entry.getAttribute("id");
        System.out.println(id);
        Book bk = new Book(id, BookID.DICTIONARY_ENTRY, id, id);
        FormattedText prolog = new FormattedText();
        bk.getChapters().add(new Chapter());
        bk.getChapters().get(0).setProlog(prolog);
        result.getBooks().add(bk);
        Visitor<RuntimeException> v = prolog.getAppendVisitor();
        for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode instanceof Text) {
                if (!childNode.getTextContent().trim().isEmpty()) {
                    throw new IOException(childNode.getTextContent());
                }
                continue;
            }
            Element elem = (Element) childNode;
            switch(elem.getNodeName()) {
                case "w":
                    v.visitHeadline(1).visitText(elem.getTextContent());
                    visitAttribute(v, "Transliteration", elem.getAttribute("xlit"));
                    visitAttribute(v, "Pronunciation", elem.getAttribute("pron"));
                    if (elem.getAttribute("xml:lang").equals("heb")) {
                        visitAttribute(v, "Language", "Hebrew");
                    } else if (elem.getAttribute("xml:lang").equals("arc")) {
                        visitAttribute(v, "Language", "Aramaic");
                    } else if (elem.getAttribute("xml:lang").equals("x-pn")) {
                        visitAttribute(v, "Language", "Proper Noun");
                    } else {
                        throw new IOException(elem.getAttribute("xml:lang"));
                    }
                    visitAttribute(v, "Part of speech", elem.getAttribute("pos"));
                    break;
                case "source":
                    visitAttribute(v, "Source", parseHebrewContent(elem));
                    break;
                case "meaning":
                    visitAttribute(v, "Meaning", parseHebrewContent(elem));
                    break;
                case "usage":
                    visitAttribute(v, "Usage", parseHebrewContent(elem));
                    break;
                case "note":
                    // skip
                    break;
                default:
                    throw new IOException(elem.getNodeName());
            }
        }
        prolog.trimWhitespace();
        prolog.finished();
    }
    return result;
}
Also used : MetadataBook(biblemulticonverter.data.MetadataBook) InputStream(java.io.InputStream) Bible(biblemulticonverter.data.Bible) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) Chapter(biblemulticonverter.data.Chapter) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText) FormattedText(biblemulticonverter.data.FormattedText) IOException(java.io.IOException) Document(org.w3c.dom.Document) URL(java.net.URL) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book)

Example 7 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class TheWord method parseLine.

private int parseLine(Visitor<RuntimeException> visitor, String line, int pos, String endTag) {
    Visitor<RuntimeException> garbageVisitor = new FormattedText().getAppendVisitor();
    while (pos < line.length()) {
        if (line.charAt(pos) != '<') {
            int endPos = line.indexOf('<', pos);
            if (endPos == -1)
                endPos = line.length();
            visitor.visitText(line.substring(pos, endPos).replaceAll("[\r\n\t ]+", " "));
            pos = endPos;
            continue;
        }
        if (endTag != null && line.startsWith(endTag, pos))
            break;
        if (pos + 2 < line.length() && line.charAt(pos + 2) == '>' && "bius".indexOf(line.charAt(pos + 1)) != -1) {
            String newEndTag = "</" + line.charAt(pos + 1) + ">";
            if (parseLine(garbageVisitor, line, pos + 3, newEndTag) != -1) {
                FormattingInstructionKind kind;
                switch(line.charAt(pos + 1)) {
                    case 'b':
                        kind = FormattingInstructionKind.BOLD;
                        break;
                    case 'i':
                        kind = FormattingInstructionKind.ITALIC;
                        break;
                    case 'u':
                        kind = FormattingInstructionKind.UNDERLINE;
                        break;
                    case 's':
                        kind = FormattingInstructionKind.STRIKE_THROUGH;
                        break;
                    default:
                        throw new RuntimeException("Cannot happen");
                }
                pos = parseLine(visitor.visitFormattingInstruction(kind), line, pos + 3, newEndTag);
                continue;
            }
        } else if (line.startsWith("<sub>", pos) || line.startsWith("<sup>", pos)) {
            String newEndTag = "</" + line.substring(pos + 1, pos + 5);
            if (parseLine(garbageVisitor, line, pos + 5, newEndTag) != -1) {
                FormattingInstructionKind kind = line.charAt(pos + 3) == 'p' ? FormattingInstructionKind.SUPERSCRIPT : FormattingInstructionKind.SUBSCRIPT;
                pos = parseLine(visitor.visitFormattingInstruction(kind), line, pos + 5, newEndTag);
                continue;
            }
        } else if (line.startsWith("<FR>", pos)) {
            if (parseLine(garbageVisitor, line, pos + 4, "<Fr>") != -1) {
                pos = parseLine(visitor.visitFormattingInstruction(FormattingInstructionKind.WORDS_OF_JESUS), line, pos + 4, "<Fr>");
                continue;
            }
        } else if (line.startsWith("<FO>", pos)) {
            if (parseLine(garbageVisitor, line, pos + 4, "<Fo>") != -1) {
                pos = parseLine(visitor.visitFormattingInstruction(FormattingInstructionKind.LINK), line, pos + 4, "<Fo>");
                continue;
            }
        } else if (line.startsWith("<font color=\"gray\">/</font>", pos)) {
            visitor.visitVerseSeparator();
            pos += 27;
            continue;
        } else if (line.startsWith("<CL>", pos)) {
            visitor.visitLineBreak(LineBreakKind.NEWLINE);
            pos += 4;
            continue;
        } else if (line.startsWith("<CM>", pos)) {
            visitor.visitLineBreak(LineBreakKind.PARAGRAPH);
            pos += 4;
            continue;
        } else if (line.startsWith("<CI><PI>", pos)) {
            visitor.visitLineBreak(LineBreakKind.NEWLINE_WITH_INDENT);
            pos += 8;
            continue;
        } else if (line.startsWith("<TS", pos) && pos + 3 < line.length()) {
            char next = line.charAt(pos + 3);
            int depth, len;
            if (next == '>') {
                depth = 1;
                len = 4;
            } else if (pos + 4 < line.length() && line.charAt(pos + 4) == '>' && next >= '1' && next <= '3') {
                depth = next - '0';
                len = 5;
            } else {
                depth = len = 0;
            }
            String end = "<Ts>", altEnd = len == 5 ? "<Ts" + next + ">" : "<Ts>";
            if (line.indexOf(altEnd, pos) != -1 && (line.indexOf(end, pos) == -1 || line.indexOf(altEnd, pos) < line.indexOf(end, pos)))
                end = altEnd;
            if (len > 0 && parseLine(garbageVisitor, line, pos + len, end) != -1) {
                pos = parseLine(visitor.visitHeadline(depth), line, pos + len, end);
                continue;
            }
        } else if (line.startsWith("<RF", pos)) {
            int closePos = line.indexOf('>', pos);
            if (parseLine(garbageVisitor, line, closePos + 1, "<Rf>") != -1) {
                pos = parseLine(visitor.visitFootnote(), line, closePos + 1, "<Rf>");
                continue;
            }
        } else if (line.startsWith("<FI>", pos)) {
            if (parseLine(garbageVisitor, line, pos + 4, "<Fi>") != -1) {
                pos = parseLine(visitor.visitFormattingInstruction(FormattingInstructionKind.ITALIC), line, pos + 4, "<Fi>");
                continue;
            }
        } else if (line.startsWith("<S%", pos)) {
            int closePos = line.indexOf('>', pos);
            if (parseLine(garbageVisitor, line, closePos + 1, "<s%>") != -1) {
                String[] strongs = line.substring(pos + 3, closePos).split("%");
                int[] strongNumbers = new int[strongs.length];
                try {
                    for (int i = 0; i < strongs.length; i++) {
                        strongNumbers[i] = Integer.parseInt(strongs[i]);
                    }
                    pos = parseLine(visitor.visitGrammarInformation(strongNumbers, null, null), line, closePos + 1, "<s%>");
                    continue;
                } catch (NumberFormatException ex) {
                // malformed Strongs tag
                }
            }
        } else if (line.startsWith("<XWG", pos) || line.startsWith("<XWH", pos)) {
            int closePos = line.indexOf('>', pos);
            try {
                int number = Integer.parseInt(line.substring(pos + 4, closePos));
                visitor.visitGrammarInformation(new int[] { number }, null, null);
                pos = closePos + 1;
                continue;
            } catch (NumberFormatException ex) {
                System.out.println("WARNING: Invalid Strong number in tag " + line.substring(pos, closePos + 1));
                warningCount++;
            }
        } else if (line.startsWith("<WT", pos)) {
        // TODO parse morph information
        } else if (line.startsWith("<RX", pos)) {
        // TODO parse cross references
        } else if (line.startsWith("<CI>", pos) || line.startsWith("<PF", pos) || line.startsWith("<PI", pos)) {
        // extra formatting not supported by BMC
        } else if (warningCount < 100) {
            System.out.println("WARNING: Skipping unknown tag " + line.substring(pos, Math.min(pos + 20, line.length())));
            warningCount++;
        }
        // the tag is not supported (yet), skip the first character
        visitor.visitText("<");
        pos++;
    }
    if (endTag != null) {
        if (line.startsWith(endTag, pos))
            pos += endTag.length();
        else
            pos = -1;
    }
    return pos;
}
Also used : FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) FormattedText(biblemulticonverter.data.FormattedText)

Example 8 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class ZefDic method createXMLBible.

protected Dictionary createXMLBible(Bible bible) throws Exception {
    final ObjectFactory of = new ObjectFactory();
    Dictionary doc = of.createDictionary();
    doc.setDicversion("1");
    doc.setRevision("1");
    doc.setRefbible("any");
    doc.setType(TEnumDicType.X_DICTIONARY);
    String title = null;
    if (bible.getName().matches("X_(DICTIONARY|COMMENTARY|STRONG|DAILY)@.*")) {
        String[] parts = bible.getName().split("@", 2);
        doc.setType(TEnumDicType.valueOf(parts[0]));
        doc.setRefbible(parts[1]);
    } else {
        title = bible.getName();
    }
    doc.setINFORMATION(of.createTINFORMATION());
    doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName("title"), String.class, title));
    MetadataBook metadata = bible.getMetadataBook();
    if (metadata != null) {
        for (String key : metadata.getKeys()) {
            String value = metadata.getValue(key);
            if (value.equals("-empty-"))
                value = "";
            if (key.equals(MetadataBookKey.version.toString())) {
                doc.setDicversion(value);
            } else if (key.equals(MetadataBookKey.revision.toString())) {
                doc.setRevision(value);
            } else if (Arrays.asList(INFORMATION_KEYS).contains(key)) {
                doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
            }
        }
    }
    for (Book bk : bible.getBooks()) {
        if (bk.getId().equals(BookID.METADATA))
            continue;
        if (!bk.getId().equals(BookID.DICTIONARY_ENTRY)) {
            System.out.println("WARNING: Unable to export book " + bk.getAbbr());
            continue;
        }
        final TItem item = of.createTItem();
        if (!bk.getLongName().equals(bk.getShortName())) {
            TItem itm = of.createTItem();
            itm.setId(bk.getShortName());
            appendTextElement(itm, "title", bk.getLongName());
            TParagraph para2 = of.createTParagraph();
            SeeType see = of.createSeeType();
            see.setContent(bk.getLongName());
            para2.getContent().add(new JAXBElement<SeeType>(new QName("see"), SeeType.class, see));
            itm.getContent().add(new JAXBElement<TParagraph>(new QName("description"), TParagraph.class, para2));
            doc.getItem().add(itm);
        }
        item.setId(bk.getLongName());
        doc.getItem().add(item);
        class ZefState {

            TParagraph para = of.createTParagraph();

            boolean eatParagraph = false;

            public void flushPara(TItem item) {
                item.getContent().add(new JAXBElement<TParagraph>(new QName("description"), TParagraph.class, para));
                para = of.createTParagraph();
            }
        }
        final ZefState state = new ZefState();
        FormattedText text = bk.getChapters().get(0).getProlog();
        class LevelVisitor implements Visitor<RuntimeException> {

            final List<Serializable> target;

            private LevelVisitor(ZefState state) {
                target = state.para.getContent();
            }

            private LevelVisitor(MyAnyType parent) {
                target = parent.getContent();
            }

            private LevelVisitor(TStyle parent) {
                target = parent.getContent();
            }

            @Override
            public int visitElementTypes(String elementTypes) throws RuntimeException {
                return 0;
            }

            @Override
            public Visitor<RuntimeException> visitHeadline(int depth) throws RuntimeException {
                System.out.println("WARNING: Nested headlines are not supported");
                return null;
            }

            @Override
            public void visitStart() throws RuntimeException {
            }

            @Override
            public void visitText(String text) throws RuntimeException {
                if (text.length() > 0)
                    target.add(text);
            }

            @Override
            public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
                System.out.println("WARNING: footnotes are not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws RuntimeException {
                if (firstChapter != lastChapter || !firstVerse.equals(lastVerse))
                    System.out.println("WARNING: Cross references to verse ranges are not supported");
                BibLinkType b = of.createBibLinkType();
                b.setBn("" + book.getZefID());
                b.setCn1("" + firstChapter);
                b.setVn1(firstVerse);
                target.add(new JAXBElement<BibLinkType>(new QName("bib_link"), BibLinkType.class, b));
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
                String tag;
                switch(kind) {
                    case BOLD:
                        tag = "strong";
                        break;
                    case ITALIC:
                        tag = "em";
                        break;
                    case SUPERSCRIPT:
                        tag = "sup";
                        break;
                    case SUBSCRIPT:
                        tag = "sub";
                        break;
                    default:
                        return visitCSSFormatting(kind.getCss());
                }
                MyAnyType mat = of.createMyAnyType();
                target.add(new JAXBElement<MyAnyType>(new QName(tag), MyAnyType.class, mat));
                return new LevelVisitor(mat);
            }

            @Override
            public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
                TStyle style = of.createTStyle();
                style.setCss(css);
                target.add(of.createTStyleSTYLE(style));
                return new LevelVisitor(style);
            }

            @Override
            public void visitVerseSeparator() throws RuntimeException {
                System.out.println("WARNING: Verse separators are not supported");
            }

            @Override
            public void visitLineBreak(LineBreakKind kind) throws RuntimeException {
                System.out.println("WARNING: Nested line breaks are not supported");
            }

            @Override
            public Visitor<RuntimeException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws RuntimeException {
                System.out.println("WARNING: Grammar information is not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitDictionaryEntry(String dictionary, String entry) throws RuntimeException {
                if (dictionary.equals("reflink")) {
                    RefLinkType r = of.createRefLinkType();
                    r.setMscope(entry.substring(1).replace('-', ';'));
                    target.add(new JAXBElement<RefLinkType>(new QName("reflink"), RefLinkType.class, r));
                } else {
                    SeeType see = of.createSeeType();
                    see.setTarget(dictionary.equals("dict") ? "x-self" : dictionary);
                    see.setContent(entry);
                    target.add(new JAXBElement<SeeType>(new QName("see"), SeeType.class, see));
                }
                return null;
            }

            @Override
            public void visitRawHTML(RawHTMLMode mode, String raw) throws RuntimeException {
                System.out.println("WARNING: Raw html output not supported");
            }

            @Override
            public Visitor<RuntimeException> visitVariationText(String[] variations) throws RuntimeException {
                throw new IllegalStateException("Variations not supported");
            }

            @Override
            public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
                return prio.handleVisitor(category, this);
            }

            @Override
            public boolean visitEnd() throws RuntimeException {
                return false;
            }
        }
        ;
        text.accept(new Visitor<RuntimeException>() {

            @Override
            public int visitElementTypes(String elementTypes) throws RuntimeException {
                return 0;
            }

            @Override
            public Visitor<RuntimeException> visitHeadline(int depth) throws RuntimeException {
                MyAnyType mat = of.createMyAnyType();
                JAXBElement<MyAnyType> elem = new JAXBElement<>(new QName("title"), MyAnyType.class, mat);
                if (depth == 1) {
                    state.flushPara(item);
                    item.getContent().add(elem);
                } else {
                    state.para.getContent().add(elem);
                }
                return new LevelVisitor(mat);
            }

            @Override
            public void visitStart() throws RuntimeException {
            }

            @Override
            public void visitText(String text) throws RuntimeException {
                new LevelVisitor(state).visitText(text);
            }

            @Override
            public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
                System.out.println("WARNING: footnotes are not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws RuntimeException {
                return new LevelVisitor(state).visitCrossReference(bookAbbr, book, firstChapter, firstVerse, lastChapter, lastVerse);
            }

            @Override
            public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
                return new LevelVisitor(state).visitFormattingInstruction(kind);
            }

            @Override
            public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
                return new LevelVisitor(state).visitCSSFormatting(css);
            }

            @Override
            public void visitVerseSeparator() throws RuntimeException {
                System.out.println("WARNING: Verse separators are not supported");
            }

            @Override
            public void visitLineBreak(LineBreakKind kind) throws RuntimeException {
                if (state.eatParagraph) {
                    state.eatParagraph = false;
                } else {
                    state.flushPara(item);
                    state.para = of.createTParagraph();
                }
            }

            @Override
            public Visitor<RuntimeException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws RuntimeException {
                System.out.println("WARNING: Grammar information is not supported");
                return null;
            }

            @Override
            public Visitor<RuntimeException> visitDictionaryEntry(String dictionary, String entry) throws RuntimeException {
                return new LevelVisitor(state).visitDictionaryEntry(dictionary, entry);
            }

            @Override
            public void visitRawHTML(RawHTMLMode mode, String raw) throws RuntimeException {
                System.out.println("WARNING: Raw html output not supported");
            }

            @Override
            public Visitor<RuntimeException> visitVariationText(String[] variations) throws RuntimeException {
                throw new IllegalStateException("Variations not supported");
            }

            @Override
            public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
                if (prio == ExtraAttributePriority.KEEP_CONTENT && category.equals("zefdic")) {
                    // "zefdic", "field", "pronunciation");
                    return null;
                } else {
                    return prio.handleVisitor(category, this);
                }
            }

            @Override
            public boolean visitEnd() throws RuntimeException {
                return false;
            }
        });
        state.flushPara(item);
    }
    return doc;
}
Also used : Dictionary(biblemulticonverter.schema.zefdic1.Dictionary) TParagraph(biblemulticonverter.schema.zefdic1.TParagraph) ExtraAttributePriority(biblemulticonverter.data.FormattedText.ExtraAttributePriority) Visitor(biblemulticonverter.data.FormattedText.Visitor) TItem(biblemulticonverter.schema.zefdic1.TItem) RawHTMLMode(biblemulticonverter.data.FormattedText.RawHTMLMode) ObjectFactory(biblemulticonverter.schema.zefdic1.ObjectFactory) BookID(biblemulticonverter.data.BookID) MetadataBook(biblemulticonverter.data.MetadataBook) Book(biblemulticonverter.data.Book) List(java.util.List) BibLinkType(biblemulticonverter.schema.zefdic1.BibLinkType) RefLinkType(biblemulticonverter.schema.zefdic1.RefLinkType) MetadataBook(biblemulticonverter.data.MetadataBook) TStyle(biblemulticonverter.schema.zefdic1.TStyle) MyAnyType(biblemulticonverter.schema.zefdic1.MyAnyType) QName(javax.xml.namespace.QName) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) FormattedText(biblemulticonverter.data.FormattedText) JAXBElement(javax.xml.bind.JAXBElement) LineBreakKind(biblemulticonverter.data.FormattedText.LineBreakKind) SeeType(biblemulticonverter.schema.zefdic1.SeeType)

Example 9 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class ZefaniaXMLRoundtrip method parseBible.

protected Bible parseBible(XMLBIBLE doc) throws Exception {
    Bible result = new Bible(doc.getBiblename());
    MetadataBook metadata = new MetadataBook();
    if (doc.getStatus() != null) {
        metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
    }
    if (doc.getVersion() != null) {
        metadata.setValue(MetadataBookKey.version, doc.getVersion());
    }
    if (doc.getRevision() != null) {
        metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
    }
    for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
        if (elem.getValue() == null)
            continue;
        String value = normalize(elem.getValue().toString(), true).trim();
        if (value.length() == 0)
            value = "-empty-";
        metadata.setValue(elem.getName().getLocalPart(), value);
    }
    metadata.finished();
    if (metadata.getKeys().size() > 0)
        result.getBooks().add(metadata.getBook());
    Set<String> abbrs = new HashSet<String>();
    Set<String> shortnames = new HashSet<String>();
    Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
    List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        int number = e.getBnumber().intValue();
        BookID bookID = BookID.fromZefId(number);
        if (shortname == null)
            shortname = "_" + bookID.getOsisID();
        else if (shortname.length() == 0)
            shortname = "_" + bookID.getOsisID() + "[[]]";
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        abbrMap.put(bookID, abbr);
    }
    abbrs.clear();
    for (BIBLEBOOK e : nl) {
        String shortname = e.getBsname();
        String longname = e.getBname();
        int number = e.getBnumber().intValue();
        BookID bookID = BookID.fromZefId(number);
        if (shortname == null)
            shortname = "_" + bookID.getOsisID();
        else if (shortname.length() == 0)
            shortname = "_" + bookID.getOsisID() + "[[]]";
        if (longname == null)
            longname = "_" + bookID.getEnglishName();
        else if (longname.length() == 0)
            longname = "_" + bookID.getEnglishName() + "[[]]";
        else
            longname = longname.replaceAll("  ++", " ").trim();
        String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
        if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
            abbr = "X" + abbr;
        if (abbr.length() == 1)
            abbr += "x";
        if (abbrs.contains(abbr)) {
            for (int i = 2; i < 100; i++) {
                if (!abbrs.contains(abbr + i)) {
                    abbr = abbr + i;
                    break;
                }
            }
        }
        abbrs.add(abbr);
        if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
            System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
            shortname = "Exo[[Gen]]";
            longname = "Exodus[[Genesis]]";
        }
        if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
            System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
            shortname = "2Chr[[1Chr]]";
        }
        if (shortnames.contains(shortname)) {
            System.out.println("WARNING: Duplicate short name " + shortname);
            for (int i = 2; i < 100; i++) {
                if (!shortnames.contains(shortname + i + "[[" + shortname + "]]")) {
                    shortname = shortname + i + "[[" + shortname + "]]";
                    break;
                }
            }
        }
        shortnames.add(shortname);
        Book book = new Book(abbr, bookID, shortname, longname);
        int lastvref = -1;
        List<Headline> headlineBuffer = new ArrayList<Headline>();
        for (CHAPTER e2 : e.getCHAPTER()) {
            int chapterNumber = e2.getCnumber().intValue();
            while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
            Chapter chapter = book.getChapters().get(chapterNumber - 1);
            for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
                if (e3 instanceof CAPTION) {
                    CAPTION caption = (CAPTION) e3;
                    if (lastvref != -1 && lastvref != caption.getVref().intValue())
                        throw new IOException();
                    lastvref = caption.getVref().intValue();
                    int level;
                    if (caption.getType() == null) {
                        level = 9;
                    } else {
                        switch(caption.getType()) {
                            case X_H_1:
                                level = 1;
                                break;
                            case X_H_2:
                                level = 2;
                                break;
                            case X_H_3:
                                level = 3;
                                break;
                            case X_H_4:
                                level = 4;
                                break;
                            case X_H_5:
                                level = 5;
                                break;
                            case X_H_6:
                                level = 6;
                                break;
                            default:
                                throw new IOException();
                        }
                    }
                    Headline h = new Headline(level);
                    headlineBuffer.add(h);
                    if (!parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
                        visitEmptyMarker(h.getAppendVisitor());
                    } else {
                        h.trimWhitespace();
                    }
                    h.finished();
                } else if (e3 instanceof REMARK) {
                    REMARK remark = (REMARK) e3;
                    int vref = remark.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        throw new IOException(vref + ":" + remark.getContent());
                    Verse v = chapter.getVerses().get(idx);
                    if (remark.getContent().size() != 1)
                        throw new IOException();
                    String remarkText = normalize((String) remark.getContent().get(0), true).trim();
                    v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "remark").visitFootnote().visitText(remarkText);
                } else if (e3 instanceof XREF) {
                    XREF xref = (XREF) e3;
                    int vref = xref.getVref().intValue();
                    int idx = chapter.getVerseIndex("" + vref);
                    if (idx == -1)
                        throw new IOException(vref + ":" + xref.getMscope());
                    Verse v = chapter.getVerses().get(idx);
                    Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "outer-xref").visitFootnote();
                    boolean first = true;
                    for (String mscope : xref.getMscope().split(" ")) {
                        Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                        if (!m.matches())
                            throw new IOException(mscope);
                        BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                        int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
                        if (m.group(3) != null)
                            endChapter = Integer.parseInt(m.group(3).substring(1));
                        String verse = m.group(4);
                        if (verse.equals("0"))
                            verse = "1//G";
                        String endVerse = m.group(5);
                        if (endVerse == null)
                            endVerse = verse;
                        else
                            endVerse = endVerse.substring(1);
                        if (endVerse.equals("0"))
                            endVerse = "1//G";
                        String xrefAbbr = abbrMap.get(xrefBookID);
                        if (xrefAbbr == null)
                            xrefAbbr = xrefBookID.getOsisID();
                        if (first)
                            first = false;
                        else
                            footnoteVisitor.visitText(" ");
                        if (xrefChapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
                            String tmp = verse;
                            verse = endVerse;
                            endVerse = tmp;
                        }
                        footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
                    }
                } else if (e3 instanceof PROLOG) {
                    PROLOG prolog = (PROLOG) e3;
                    if (prolog.getVref().intValue() != 1)
                        throw new IOException("" + prolog.getVref());
                    if (chapter.getProlog() != null)
                        throw new IOException("More than one prolog found");
                    FormattedText prologText = new FormattedText();
                    if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
                        prologText.trimWhitespace();
                        prologText.finished();
                        chapter.setProlog(prologText);
                    }
                } else if (e3 instanceof VERS) {
                    VERS vers = (VERS) e3;
                    int vnumber = vers.getVnumber().intValue();
                    if (lastvref != -1) {
                        if (lastvref != vnumber)
                            throw new IOException(lastvref + " != " + vnumber);
                        lastvref = -1;
                    }
                    Verse verse = new Verse("" + vnumber);
                    Visitor<RuntimeException> visitor = verse.getAppendVisitor();
                    boolean contentFound = false;
                    if (headlineBuffer.size() > 0) {
                        for (Headline h : headlineBuffer) {
                            h.accept(visitor.visitHeadline(h.getDepth()));
                        }
                        headlineBuffer.clear();
                        contentFound = true;
                    }
                    contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
                    if (!contentFound) {
                        visitEmptyMarker(visitor);
                    }
                    verse.trimWhitespace();
                    chapter.getVerses().add(verse);
                } else {
                    throw new IOException(e3.getClass().toString());
                }
            }
            for (Verse v : chapter.getVerses()) v.finished();
        }
        result.getBooks().add(book);
    }
    return result;
}
Also used : Matcher(java.util.regex.Matcher) Bible(biblemulticonverter.data.Bible) ArrayList(java.util.ArrayList) BookID(biblemulticonverter.data.BookID) Book(biblemulticonverter.data.Book) MetadataBook(biblemulticonverter.data.MetadataBook) Headline(biblemulticonverter.data.FormattedText.Headline) VERS(biblemulticonverter.schema.zef2005.VERS) PROLOG(biblemulticonverter.schema.zef2005.PROLOG) EnumMap(java.util.EnumMap) REMARK(biblemulticonverter.schema.zef2005.REMARK) HashSet(java.util.HashSet) MetadataBook(biblemulticonverter.data.MetadataBook) Chapter(biblemulticonverter.data.Chapter) IOException(java.io.IOException) FormattedText(biblemulticonverter.data.FormattedText) BIBLEBOOK(biblemulticonverter.schema.zef2005.BIBLEBOOK) XREF(biblemulticonverter.schema.zef2005.XREF) CHAPTER(biblemulticonverter.schema.zef2005.CHAPTER) CAPTION(biblemulticonverter.schema.zef2005.CAPTION) VirtualVerse(biblemulticonverter.data.VirtualVerse) Verse(biblemulticonverter.data.Verse)

Example 10 with FormattedText

use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.

the class OSIS method parseFormattedText.

protected void parseFormattedText(String verseName, Element root, FormattedText ft) {
    root.normalize();
    for (Node node = root.getFirstChild(); node != null; node = node.getNextSibling()) {
        if (node instanceof Text) {
            String text = node.getTextContent().replaceAll("[ \r\n\t]+", " ");
            if (text.startsWith(" ") && (node.getPreviousSibling() == null || Arrays.asList("brp", "lb", "title").contains(node.getPreviousSibling().getNodeName()))) {
                printWarning("WARNING: Whitespace at beginning of verse or after title/newline");
                text = text.substring(1);
            }
            Node ns = node.getNextSibling();
            while (ns != null && Arrays.asList("w", "q").contains(ns.getNodeName()) && ns.getFirstChild() == null) ns = ns.getNextSibling();
            if (text.endsWith(" ") && (ns == null || Arrays.asList("brp", "lb", "title").contains(ns.getNodeName()))) {
                printWarning("WARNING: Whitespace at end of verse or after title/newline");
                text = text.substring(0, text.length() - 1);
            }
            if (text.length() > 0)
                ft.getAppendVisitor().visitText(text);
        } else {
            Element elem = (Element) node;
            if (elem.getNodeName().equals("title")) {
                Headline hl = new Headline(2);
                if (elem.getAttribute("type").equals("chapter")) {
                    hl = new Headline(1);
                }
                if (elem.getChildNodes().getLength() == 1 && elem.getFirstChild() instanceof Text) {
                    String text = elem.getFirstChild().getTextContent();
                    if (!text.equals(text.trim())) {
                        printWarning("WARNING: Whitespace at beginning/end of headline: '" + text + "'");
                        elem.getFirstChild().setNodeValue(text.trim());
                    }
                }
                parseStructuredTextChildren(hl.getAppendVisitor(), elem);
                if (hl.getElementTypes(1).length() == 0) {
                    printWarning("WARNING: Empty headline in " + verseName);
                } else {
                    hl.accept(ft.getAppendVisitor().visitHeadline(hl.getDepth()));
                }
            } else {
                parseStructuredTextElement(ft.getAppendVisitor(), elem);
            }
        }
    }
}
Also used : Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) Headline(biblemulticonverter.data.FormattedText.Headline) Text(org.w3c.dom.Text) FormattedText(biblemulticonverter.data.FormattedText)

Aggregations

FormattedText (biblemulticonverter.data.FormattedText)31 Chapter (biblemulticonverter.data.Chapter)25 Book (biblemulticonverter.data.Book)24 Verse (biblemulticonverter.data.Verse)22 Bible (biblemulticonverter.data.Bible)14 BookID (biblemulticonverter.data.BookID)10 Headline (biblemulticonverter.data.FormattedText.Headline)9 ArrayList (java.util.ArrayList)9 MetadataBook (biblemulticonverter.data.MetadataBook)8 IOException (java.io.IOException)8 Visitor (biblemulticonverter.data.FormattedText.Visitor)7 VirtualVerse (biblemulticonverter.data.VirtualVerse)7 File (java.io.File)6 EnumMap (java.util.EnumMap)5 HashMap (java.util.HashMap)4 Matcher (java.util.regex.Matcher)4 Element (org.w3c.dom.Element)3 Node (org.w3c.dom.Node)3 ExtraAttributePriority (biblemulticonverter.data.FormattedText.ExtraAttributePriority)2 FormattingInstructionKind (biblemulticonverter.data.FormattedText.FormattingInstructionKind)2