Search in sources :

Example 6 with Note

use of biblemulticonverter.schema.usx3.Note in project BibleMultiConverter by schierlm.

the class USX3 method parseCharContent.

private void parseCharContent(List<Object> content, ParatextBook.ParatextCharacterContentContainer container) throws IOException {
    for (Object o : content) {
        if (o instanceof Optbreak) {
            // is ignored in USFM as well
            System.out.println("WARNING: Skipping optional break");
        } else if (o instanceof Ref) {
            Ref r = (Ref) o;
            try {
                container.getContent().add(ParatextCharacterContent.Reference.parse(r.getLoc(), r.getContent()));
            } catch (IllegalArgumentException e) {
                String location = unmarshallerLocationListener.getHumanReadableLocation(o);
                System.out.println("WARNING: Unsupported structured reference format at " + location + " - replaced by plain text: " + r.getLoc());
                final ParatextCharacterContent.Text text = ParatextCharacterContent.Text.from(r.getContent());
                if (text != null) {
                    container.getContent().add(text);
                }
            }
        } else if (o instanceof String) {
            final ParatextCharacterContent.Text text = ParatextCharacterContent.Text.from((String) o);
            if (text != null) {
                container.getContent().add(text);
            }
        } else if (o instanceof Figure) {
            System.out.println("WARNING: Skipping figure");
        } else if (o instanceof Char) {
            Char chr = (Char) o;
            if (CHAR_STYLE_UNSUPPORTED.contains(chr.getStyle())) {
                parseCharContent(chr.getContent(), container);
            } else {
                ParatextCharacterContent.AutoClosingFormatting f = new ParatextCharacterContent.AutoClosingFormatting(CHAR_STYLE_MAP.get(chr.getStyle()), false);
                String lemma = chr.getLemma();
                if (f.getKind() == ParatextCharacterContent.AutoClosingFormattingKind.WORDLIST && lemma != null && !lemma.isEmpty()) {
                    f.getAttributes().put("lemma", lemma);
                }
                container.getContent().add(f);
                parseCharContent(chr.getContent(), f);
            }
        } else if (o instanceof Verse) {
            container.getContent().add(handleVerse((Verse) o));
        } else if (o instanceof Note) {
            Note note = (Note) o;
            ParatextCharacterContent.FootnoteXref nx = new ParatextCharacterContent.FootnoteXref(NOTE_STYLE_MAP.get(note.getStyle()), note.getCaller());
            container.getContent().add(nx);
            parseCharContent(note.getContent(), nx);
        } else {
            throw new IOException("Unsupported character content element: " + o.getClass().getName());
        }
    }
}
Also used : IOException(java.io.IOException) Optbreak(biblemulticonverter.schema.usx3.Optbreak) Figure(biblemulticonverter.schema.usx3.Figure) Ref(biblemulticonverter.schema.usx3.Ref) Char(biblemulticonverter.schema.usx3.Char) Note(biblemulticonverter.schema.usx3.Note) Verse(biblemulticonverter.schema.usx3.Verse)

Example 7 with Note

use of biblemulticonverter.schema.usx3.Note in project BibleMultiConverter by schierlm.

the class ZefaniaXML method parseContent.

private boolean parseContent(Visitor<RuntimeException> visitor, List<Object> contentList, Map<BookID, String> abbrMap) throws IOException {
    boolean contentFound = false;
    for (Object n : contentList) {
        if (n instanceof String) {
            String value = normalize((String) n, false);
            visitor.visitText(value);
            contentFound |= value.trim().length() > 0;
        } else if (n instanceof DIV || n instanceof NOTE) {
            NOTE note;
            if (n instanceof DIV) {
                note = ((DIV) n).getNOTE();
            } else {
                note = (NOTE) n;
            }
            if (note.getContent().size() == 0)
                continue;
            Visitor<RuntimeException> v;
            v = visitor.visitFootnote();
            boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
            if (!subContentFound)
                visitEmptyMarker(v);
            contentFound = true;
        } else if (n instanceof BR) {
            BR br = (BR) n;
            Visitor<RuntimeException> v = visitor;
            int count = 1;
            if (br.getCount() != null) {
                count = br.getCount().intValue();
            }
            if (count < 1 || count > 10)
                count = 1;
            for (int ii = 0; ii < count; ii++) {
                if (br.getArt() == EnumBreak.X_P)
                    v.visitLineBreak(LineBreakKind.PARAGRAPH);
                else
                    v.visitLineBreak(LineBreakKind.NEWLINE);
            }
            contentFound = true;
        } else if (n instanceof XREF) {
            XREF xref = (XREF) n;
            Visitor<RuntimeException> footnoteVisitor = visitor.visitFootnote();
            footnoteVisitor.visitText(FormattedText.XREF_MARKER.trim());
            boolean first = true;
            if (xref.getMscope() == null) {
                if (xref.getFscope() == null) {
                    System.out.println("WARNING: Ignoring XREF with neither fscope nor mscope");
                } else {
                    for (String fscope : xref.getFscope().split("; ")) {
                        Matcher m = Utils.compilePattern("([0-9A-Za-z]+) ([0-9]+), ([0-9]+[a-z]?)").matcher(fscope);
                        if (!m.matches()) {
                            System.out.println("WARNING: Unable to parse XREF fscope " + fscope + ", skipping");
                            continue;
                        }
                        String xBook = m.group(1);
                        int xChapter = Integer.parseInt(m.group(2));
                        String xVerse = m.group(3);
                        BookID xID = null;
                        for (Map.Entry<BookID, String> abbrEntry : abbrMap.entrySet()) {
                            if (abbrEntry.getValue().equals(xBook)) {
                                xID = abbrEntry.getKey();
                                break;
                            }
                        }
                        if (xID == null) {
                            System.out.println("WARNING: Book not found for XREF fscope " + fscope + ", skipping");
                            continue;
                        }
                        footnoteVisitor.visitText(" ");
                        footnoteVisitor.visitCrossReference(xBook, xID, xChapter, xVerse, xChapter, xVerse).visitText(xBook + " " + xChapter + ":" + xVerse);
                    }
                }
            } else {
                for (String mscope : xref.getMscope().split(" ")) {
                    Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                    if (!m.matches())
                        continue;
                    BookID bookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                    int chapter = Integer.parseInt(m.group(2)), endChapter = chapter;
                    if (m.group(3) != null)
                        endChapter = Integer.parseInt(m.group(3).substring(1));
                    String verse = m.group(4);
                    String endVerse = m.group(5);
                    if (endVerse == null)
                        endVerse = verse;
                    else
                        endVerse = endVerse.substring(1);
                    if (verse.equals("0") || endVerse.equals("0"))
                        continue;
                    if (chapter == endChapter && Integer.parseInt(verse) > Integer.parseInt(endVerse))
                        continue;
                    String abbr = abbrMap.get(bookID);
                    if (abbr == null)
                        abbr = bookID.getOsisID();
                    if (first)
                        first = false;
                    else
                        footnoteVisitor.visitText(" ");
                    footnoteVisitor.visitCrossReference(abbr, bookID, chapter, verse, endChapter, endVerse).visitText(abbr + " " + chapter + ":" + verse);
                }
            }
            if (first)
                visitEmptyMarker(footnoteVisitor);
            contentFound = true;
        } else if (n instanceof JAXBElement<?>) {
            String name = ((JAXBElement<?>) n).getName().toString();
            Object nn = ((JAXBElement<?>) n).getValue();
            if (name.equals("STYLE") && nn instanceof STYLE) {
                String css = ((STYLE) nn).getCss();
                String id = ((STYLE) nn).getId();
                FormattingInstructionKind kind = null;
                if (id != null && id.equals("cl:divineName")) {
                    kind = FormattingInstructionKind.DIVINE_NAME;
                } else if (css == null || css.startsWith("display:block;")) {
                    kind = null;
                } else if (css.contains("italic")) {
                    kind = FormattingInstructionKind.ITALIC;
                } else if (css.contains("bold")) {
                    kind = FormattingInstructionKind.BOLD;
                } else if (css.toLowerCase().contains("color:#ff0000")) {
                    kind = FormattingInstructionKind.WORDS_OF_JESUS;
                } else if (css.contains("color:blue")) {
                    kind = FormattingInstructionKind.LINK;
                } else if (css.contains("vertical-align:super") || css.equals("font-size:small")) {
                    kind = FormattingInstructionKind.SUPERSCRIPT;
                }
                Visitor<RuntimeException> contentVisitor = visitor;
                if (kind != null) {
                    contentVisitor = contentVisitor.visitFormattingInstruction(kind);
                } else if (css != null && (kind == null || !kind.getCss().equals(css))) {
                    contentVisitor = contentVisitor.visitCSSFormatting(css);
                }
                List<Object> content = ((STYLE) nn).getContent();
                boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
                if (!subContentFound)
                    visitEmptyMarker(contentVisitor);
            } else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
                GRAM gram = (GRAM) nn;
                boolean addSpace = false;
                int lastIndex = gram.getContent().size() - 1;
                if (lastIndex >= 0 && gram.getContent().get(lastIndex) instanceof String) {
                    String lastString = normalize((String) gram.getContent().get(lastIndex), false);
                    if (lastString.endsWith(" ")) {
                        String afterString = "";
                        int pos = contentList.indexOf(n);
                        if (pos < contentList.size() - 1 && contentList.get(pos + 1) instanceof String) {
                            afterString = normalize((String) contentList.get(pos + 1), false);
                        }
                        if (!afterString.startsWith(" ")) {
                            addSpace = true;
                            gram.getContent().set(lastIndex, lastString.substring(0, lastString.length() - 1));
                        }
                    }
                }
                Visitor<RuntimeException> strongVisitor = visitor;
                int[] strongs = null;
                char[] strongsPrefixes = null;
                if (gram.getStr() != null) {
                    List<String> strongList = new ArrayList<String>(Arrays.asList(gram.getStr().trim().replaceAll(" ++", " ").split(" ")));
                    for (int i = 0; i < strongList.size(); i++) {
                        if (!strongList.get(i).matches("[GH]?[0-9]+")) {
                            System.out.println("WARNING: Skipping invalid Strong number " + strongList.get(i));
                            strongList.remove(i);
                            i--;
                        }
                    }
                    strongs = new int[strongList.size()];
                    strongsPrefixes = strongList.size() == 0 ? null : new char[strongList.size()];
                    for (int i = 0; i < strongs.length; i++) {
                        if (strongList.get(i).matches("[GH][0-9]+")) {
                            strongsPrefixes[i] = strongList.get(i).charAt(0);
                            strongs[i] = Integer.parseInt(strongList.get(i).substring(1));
                        } else {
                            strongsPrefixes = null;
                            strongs[i] = Integer.parseInt(strongList.get(i));
                        }
                    }
                }
                String[] rmac = null;
                if (gram.getRmac() != null && gram.getRmac().length() > 0) {
                    List<String> rmacList = new ArrayList<String>(Arrays.asList(gram.getRmac().toUpperCase().split(" ")));
                    for (int i = 0; i < rmacList.size(); i++) {
                        String rmacValue = rmacList.get(i);
                        if (rmacValue.endsWith("-"))
                            rmacValue = rmacValue.substring(0, rmacValue.length() - 1);
                        rmacList.set(i, rmacValue);
                        if (!rmacValue.matches(Utils.RMAC_REGEX)) {
                            System.out.println("WARNING: Skipping invalid RMAC: " + rmacValue);
                            rmacList.remove(i);
                            i--;
                        }
                        rmac = (String[]) rmacList.toArray(new String[rmacList.size()]);
                    }
                }
                if (strongs != null && strongs.length == 0)
                    strongs = null;
                if (rmac != null && rmac.length == 0)
                    rmac = null;
                if (strongs != null || rmac != null)
                    strongVisitor = strongVisitor.visitGrammarInformation(strongsPrefixes, strongs, rmac, null);
                if (!parseContent(strongVisitor, gram.getContent(), abbrMap) && strongVisitor != visitor) {
                    visitEmptyMarker(strongVisitor);
                }
                if (addSpace)
                    visitor.visitText(" ");
            } else {
                continue;
            }
            contentFound = true;
        }
    }
    return contentFound;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) STYLE(biblemulticonverter.schema.zef2005.STYLE) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) JAXBElement(javax.xml.bind.JAXBElement) DIV(biblemulticonverter.schema.zef2005.DIV) BR(biblemulticonverter.schema.zef2005.BR) XREF(biblemulticonverter.schema.zef2005.XREF) BookID(biblemulticonverter.data.BookID) NOTE(biblemulticonverter.schema.zef2005.NOTE) GRAM(biblemulticonverter.schema.zef2005.GRAM) List(java.util.List) ArrayList(java.util.ArrayList)

Example 8 with Note

use of biblemulticonverter.schema.usx3.Note in project BibleMultiConverter by schierlm.

the class ZefaniaXMLRoundtrip method parseContent.

private boolean parseContent(Visitor<RuntimeException> visitor, List<Object> contentList, Map<BookID, String> abbrMap) throws IOException {
    boolean contentFound = false;
    for (Object n : contentList) {
        if (n instanceof String) {
            String value = normalize((String) n, false);
            visitor.visitText(value);
            contentFound |= value.trim().length() > 0;
        } else if (n instanceof DIV || n instanceof NOTE) {
            NOTE note;
            Visitor<RuntimeException> v;
            if (n instanceof DIV) {
                note = ((DIV) n).getNOTE();
                if (note.getContent().size() == 0)
                    continue;
                v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "div").visitFootnote();
            } else {
                note = (NOTE) n;
                if (note.getContent().size() == 0)
                    continue;
                v = visitor.visitFootnote();
            }
            boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
            if (!subContentFound)
                visitEmptyMarker(v);
            contentFound = true;
        } else if (n instanceof BR) {
            BR br = (BR) n;
            Visitor<RuntimeException> v = visitor;
            int count = 1;
            if (br.getCount() != null) {
                count = br.getCount().intValue();
                v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "newline-group", br.getCount() + "--" + br.getArt().value());
            }
            if (count < 1 || count > 10)
                throw new RuntimeException();
            for (int ii = 0; ii < count; ii++) {
                switch(br.getArt()) {
                    case X_NL:
                        v.visitLineBreak(LineBreakKind.NEWLINE);
                        break;
                    case X_P:
                        v.visitLineBreak(LineBreakKind.PARAGRAPH);
                        break;
                    default:
                        throw new RuntimeException(br.getArt().toString());
                }
            }
            contentFound = true;
        } else if (n instanceof XREF) {
            XREF xref = (XREF) n;
            Visitor<RuntimeException> footnoteVisitor = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "inner-xref").visitFootnote();
            boolean first = true;
            for (String mscope : xref.getMscope().split(" ")) {
                Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
                if (!m.matches())
                    throw new IOException(mscope);
                BookID bookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
                int chapter = Integer.parseInt(m.group(2)), endChapter = chapter;
                if (m.group(3) != null)
                    endChapter = Integer.parseInt(m.group(3).substring(1));
                String verse = m.group(4);
                if (verse.equals("0"))
                    verse = "1//G";
                String endVerse = m.group(5);
                if (endVerse == null)
                    endVerse = verse;
                else
                    endVerse = endVerse.substring(1);
                if (endVerse.equals("0"))
                    endVerse = "1//G";
                String abbr = abbrMap.get(bookID);
                if (abbr == null)
                    abbr = bookID.getOsisID();
                if (first)
                    first = false;
                else
                    footnoteVisitor.visitText(" ");
                if (chapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
                    String tmp = verse;
                    verse = endVerse;
                    endVerse = tmp;
                }
                footnoteVisitor.visitCrossReference(abbr, bookID, chapter, verse, endChapter, endVerse).visitText(abbr + " " + chapter + ":" + verse);
            }
            contentFound = true;
        } else if (n instanceof JAXBElement<?>) {
            String name = ((JAXBElement<?>) n).getName().toString();
            Object nn = ((JAXBElement<?>) n).getValue();
            if (name.equals("STYLE") && nn instanceof STYLE) {
                String css = ((STYLE) nn).getCss();
                String id = ((STYLE) nn).getId();
                if (id != null && css != null)
                    throw new IOException(id + "/" + css);
                if (css != null && css.startsWith("display:block;")) {
                    // not really a formatting instruction, but more some
                    // clever way of indentation
                    List<Object> content = ((STYLE) nn).getContent();
                    Visitor<RuntimeException> contentVisitor = visitor.visitCSSFormatting(css);
                    boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
                    if (!subContentFound)
                        visitEmptyMarker(contentVisitor);
                } else {
                    FormattingInstructionKind kind;
                    if (id != null && id.equals("cl:divineName")) {
                        kind = FormattingInstructionKind.DIVINE_NAME;
                    } else if (css == null) {
                        throw new IOException(id);
                    } else if (css.contains("italic")) {
                        kind = FormattingInstructionKind.ITALIC;
                    } else if (css.contains("bold")) {
                        kind = FormattingInstructionKind.BOLD;
                    } else if (css.equalsIgnoreCase("color:#FF0000")) {
                        kind = FormattingInstructionKind.WORDS_OF_JESUS;
                    } else if (css.equals("color:blue")) {
                        kind = FormattingInstructionKind.LINK;
                    } else if (css.equals("color:#00CC33;font-size:8pt;vertical-align:super") || css.equals("font-size:small")) {
                        kind = FormattingInstructionKind.SUPERSCRIPT;
                    } else {
                        throw new IOException(css);
                    }
                    List<Object> content = ((STYLE) nn).getContent();
                    Visitor<RuntimeException> contentVisitor = visitor.visitFormattingInstruction(kind);
                    if (css != null && !kind.getCss().equals(css)) {
                        contentVisitor = contentVisitor.visitCSSFormatting(css);
                    }
                    if (content.size() == 0) {
                        visitEmptyMarker(contentVisitor);
                    } else {
                        boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
                        if (!subContentFound)
                            visitEmptyMarker(contentVisitor);
                    }
                }
            } else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
                GRAM gram = (GRAM) nn;
                Visitor<RuntimeException> strongVisitor = visitor;
                if (!name.equals("GRAM")) {
                    strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "gram-tag", name);
                }
                if (gram.getStr() == null && gram.getRmac() == null)
                    throw new IOException();
                boolean realPrefixes = false;
                char[] strongsPrefixes = null;
                int[] strongs = null;
                if (gram.getStr() != null) {
                    String strong = gram.getStr().trim().replaceAll(" ++", " ");
                    if (strong.length() == 0)
                        strong = "0";
                    if (strong.equals("?"))
                        strong = "99111";
                    if (!strong.matches("[GH]?[0-9]+( [GH]?[0-9]+)*"))
                        throw new IOException(strong);
                    String[] tmpStrongs = strong.split(" ");
                    strongsPrefixes = new char[tmpStrongs.length];
                    strongs = new int[tmpStrongs.length];
                    for (int i = 0; i < tmpStrongs.length; i++) {
                        if (tmpStrongs[i].matches("[GH][0-9]+")) {
                            strongsPrefixes[i] = tmpStrongs[i].charAt(0);
                            strongs[i] = Integer.parseInt(tmpStrongs[i].substring(1));
                            realPrefixes = true;
                        } else {
                            strongsPrefixes[i] = 'X';
                            strongs[i] = Integer.parseInt(tmpStrongs[i]);
                        }
                    }
                }
                String[] rmacs = null;
                if (gram.getRmac() != null) {
                    String rmac = gram.getRmac();
                    rmacs = rmac.split(" ");
                }
                strongVisitor = strongVisitor.visitGrammarInformation(realPrefixes ? strongsPrefixes : null, strongs, rmacs, null);
                if (!parseContent(strongVisitor, gram.getContent(), abbrMap)) {
                    visitEmptyMarker(strongVisitor);
                }
            } else {
                throw new IOException(name + "/" + nn.getClass().toString());
            }
            contentFound = true;
        } else {
            throw new IOException(n.getClass().toString());
        }
    }
    return contentFound;
}
Also used : Visitor(biblemulticonverter.data.FormattedText.Visitor) Matcher(java.util.regex.Matcher) STYLE(biblemulticonverter.schema.zef2005.STYLE) FormattingInstructionKind(biblemulticonverter.data.FormattedText.FormattingInstructionKind) IOException(java.io.IOException) JAXBElement(javax.xml.bind.JAXBElement) DIV(biblemulticonverter.schema.zef2005.DIV) BR(biblemulticonverter.schema.zef2005.BR) XREF(biblemulticonverter.schema.zef2005.XREF) BookID(biblemulticonverter.data.BookID) NOTE(biblemulticonverter.schema.zef2005.NOTE) GRAM(biblemulticonverter.schema.zef2005.GRAM)

Aggregations

IOException (java.io.IOException)4 BookID (biblemulticonverter.data.BookID)3 FormattingInstructionKind (biblemulticonverter.data.FormattedText.FormattingInstructionKind)3 Visitor (biblemulticonverter.data.FormattedText.Visitor)3 BR (biblemulticonverter.schema.zef2005.BR)3 DIV (biblemulticonverter.schema.zef2005.DIV)3 GRAM (biblemulticonverter.schema.zef2005.GRAM)3 NOTE (biblemulticonverter.schema.zef2005.NOTE)3 Note (com.google.containeranalysis.v1alpha1.Note)3 Note (biblemulticonverter.schema.usx3.Note)2 Verse (biblemulticonverter.schema.usx3.Verse)2 STYLE (biblemulticonverter.schema.zef2005.STYLE)2 XREF (biblemulticonverter.schema.zef2005.XREF)2 Matcher (java.util.regex.Matcher)2 JAXBElement (javax.xml.bind.JAXBElement)2 Test (org.junit.Test)2 Book (biblemulticonverter.data.Book)1 Chapter (biblemulticonverter.data.Chapter)1 FormattedText (biblemulticonverter.data.FormattedText)1 ExtraAttributePriority (biblemulticonverter.data.FormattedText.ExtraAttributePriority)1