Search in sources :

Example 1 with Verse

use of biblemulticonverter.schema.usx3.Verse in project BibleMultiConverter by schierlm.

the class USX3 method doImportBook.

@Override
protected ParatextBook doImportBook(File inputFile) throws Exception {
    if (!inputFile.getName().toLowerCase().endsWith(".usx"))
        return null;
    ValidateXML.validateFileBeforeParsing(getSchema(), inputFile);
    JAXBContext ctx = JAXBContext.newInstance(ObjectFactory.class.getPackage().getName());
    XMLInputFactory xif = XMLInputFactory.newFactory();
    XMLStreamReader xsr = xif.createXMLStreamReader(new FileInputStream(inputFile));
    Unmarshaller u = ctx.createUnmarshaller();
    u.setListener(unmarshallerLocationListener);
    unmarshallerLocationListener.setXMLStreamReader(inputFile.getName(), xsr);
    Usx doc = (Usx) u.unmarshal(xsr);
    xsr.close();
    ParatextBook.ParatextID id = ParatextBook.ParatextID.fromIdentifier(doc.getBook().getCode().toUpperCase());
    if (id == null) {
        System.out.println("WARNING: Skipping book with unknown ID: " + doc.getBook().getCode());
        return null;
    }
    ParatextBook result = new ParatextBook(id, doc.getBook().getContent());
    ParatextCharacterContent charContent = null;
    for (Object o : doc.getParaOrTableOrChapter()) {
        if (o instanceof Para) {
            Para para = (Para) o;
            if (BOOK_HEADER_ATTRIBUTE_TAGS.contains(para.getStyle().value())) {
                String value = "";
                for (Object oo : para.getContent()) {
                    if (oo instanceof String) {
                        value += ((String) oo).replaceAll("[ \r\n\t]+", " ");
                    } else {
                        throw new RuntimeException("Unsupported content in attribute: " + oo.getClass());
                    }
                }
                result.getAttributes().put(para.getStyle().value(), value);
                charContent = null;
            } else if (para.getStyle() == ParaStyle.PB) {
                if (charContent == null) {
                    charContent = new ParatextCharacterContent();
                    result.getContent().add(charContent);
                }
                charContent.getContent().add(new ParatextCharacterContent.AutoClosingFormatting(ParatextCharacterContent.AutoClosingFormattingKind.PAGE_BREAK, false));
            } else if (PARA_STYLE_UNSUPPORTED.contains(para.getStyle())) {
                // skip
                charContent = null;
            } else {
                result.getContent().add(new ParatextBook.ParagraphStart(PARA_STYLE_MAP.get(para.getStyle())));
                charContent = null;
                if (!para.getContent().isEmpty()) {
                    charContent = new ParatextCharacterContent();
                    result.getContent().add(charContent);
                    parseCharContent(para.getContent(), charContent);
                }
            }
        } else if (o instanceof Table) {
            Table table = (Table) o;
            for (Row row : table.getRow()) {
                result.getContent().add(new ParatextBook.ParagraphStart(ParatextBook.ParagraphKind.TABLE_ROW));
                for (Object oo : row.getVerseOrCell()) {
                    if (oo instanceof Verse) {
                        Verse verse = (Verse) oo;
                        ParatextCharacterContent.ParatextCharacterContentPart verseStartOrEnd = handleVerse(verse);
                        charContent = new ParatextCharacterContent();
                        result.getContent().add(charContent);
                        charContent.getContent().add(verseStartOrEnd);
                    } else if (oo instanceof Cell) {
                        Cell cell = (Cell) oo;
                        result.getContent().add(new ParatextBook.TableCellStart(cell.getStyle().value()));
                        charContent = new ParatextCharacterContent();
                        result.getContent().add(charContent);
                        parseCharContent(cell.getContent(), charContent);
                    } else {
                        throw new IOException("Unsupported table row element: " + o.getClass().getName());
                    }
                }
            }
            charContent = null;
        } else if (o instanceof Chapter) {
            Chapter chapter = (Chapter) o;
            if (chapter.getSid() != null) {
                // Assume start chapter
                result.getContent().add(new ParatextBook.ChapterStart(new ChapterIdentifier(result.getId(), ((Chapter) o).getNumber().intValue())));
            } else if (chapter.getEid() != null) {
                // Assume end chapter
                ChapterIdentifier location = ChapterIdentifier.fromLocationString(chapter.getEid());
                if (location == null) {
                    throw new IOException("Invalid chapter eid found: " + chapter.getEid());
                }
                result.getContent().add(new ParatextBook.ChapterEnd(location));
            } else {
                throw new IOException("Invalid chapter found, both sid and eid are undefined: " + chapter);
            }
            charContent = null;
        } else if (o instanceof Note) {
            if (charContent == null) {
                charContent = new ParatextCharacterContent();
                result.getContent().add(charContent);
            }
            Note note = (Note) o;
            ParatextCharacterContent.FootnoteXref nx = new ParatextCharacterContent.FootnoteXref(NOTE_STYLE_MAP.get(note.getStyle()), note.getCaller());
            charContent.getContent().add(nx);
            parseCharContent(note.getContent(), nx);
        } else if (o instanceof Sidebar) {
            System.out.println("WARNING: Skipping sidebar (study bible content)");
            charContent = null;
        } else {
            throw new IOException("Unsupported book level element: " + o.getClass().getName());
        }
    }
    return result;
}
Also used : XMLStreamReader(javax.xml.stream.XMLStreamReader) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Cell(biblemulticonverter.schema.usx3.Cell) Table(biblemulticonverter.schema.usx3.Table) Para(biblemulticonverter.schema.usx3.Para) Chapter(biblemulticonverter.schema.usx3.Chapter) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Note(biblemulticonverter.schema.usx3.Note) Usx(biblemulticonverter.schema.usx3.Usx) Row(biblemulticonverter.schema.usx3.Row) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) XMLInputFactory(javax.xml.stream.XMLInputFactory) Verse(biblemulticonverter.schema.usx3.Verse) Sidebar(biblemulticonverter.schema.usx3.Sidebar)

Example 2 with Verse

use of biblemulticonverter.schema.usx3.Verse in project BibleMultiConverter by schierlm.

the class USX3 method parseCharContent.

private void parseCharContent(List<Object> content, ParatextBook.ParatextCharacterContentContainer container) throws IOException {
    for (Object o : content) {
        if (o instanceof Optbreak) {
            // is ignored in USFM as well
            System.out.println("WARNING: Skipping optional break");
        } else if (o instanceof Ref) {
            Ref r = (Ref) o;
            try {
                container.getContent().add(ParatextCharacterContent.Reference.parse(r.getLoc(), r.getContent()));
            } catch (IllegalArgumentException e) {
                String location = unmarshallerLocationListener.getHumanReadableLocation(o);
                System.out.println("WARNING: Unsupported structured reference format at " + location + " - replaced by plain text: " + r.getLoc());
                final ParatextCharacterContent.Text text = ParatextCharacterContent.Text.from(r.getContent());
                if (text != null) {
                    container.getContent().add(text);
                }
            }
        } else if (o instanceof String) {
            final ParatextCharacterContent.Text text = ParatextCharacterContent.Text.from((String) o);
            if (text != null) {
                container.getContent().add(text);
            }
        } else if (o instanceof Figure) {
            System.out.println("WARNING: Skipping figure");
        } else if (o instanceof Char) {
            Char chr = (Char) o;
            if (CHAR_STYLE_UNSUPPORTED.contains(chr.getStyle())) {
                parseCharContent(chr.getContent(), container);
            } else {
                ParatextCharacterContent.AutoClosingFormatting f = new ParatextCharacterContent.AutoClosingFormatting(CHAR_STYLE_MAP.get(chr.getStyle()), false);
                String lemma = chr.getLemma();
                if (f.getKind() == ParatextCharacterContent.AutoClosingFormattingKind.WORDLIST && lemma != null && !lemma.isEmpty()) {
                    f.getAttributes().put("lemma", lemma);
                }
                container.getContent().add(f);
                parseCharContent(chr.getContent(), f);
            }
        } else if (o instanceof Verse) {
            container.getContent().add(handleVerse((Verse) o));
        } else if (o instanceof Note) {
            Note note = (Note) o;
            ParatextCharacterContent.FootnoteXref nx = new ParatextCharacterContent.FootnoteXref(NOTE_STYLE_MAP.get(note.getStyle()), note.getCaller());
            container.getContent().add(nx);
            parseCharContent(note.getContent(), nx);
        } else {
            throw new IOException("Unsupported character content element: " + o.getClass().getName());
        }
    }
}
Also used : IOException(java.io.IOException) Optbreak(biblemulticonverter.schema.usx3.Optbreak) Figure(biblemulticonverter.schema.usx3.Figure) Ref(biblemulticonverter.schema.usx3.Ref) Char(biblemulticonverter.schema.usx3.Char) Note(biblemulticonverter.schema.usx3.Note) Verse(biblemulticonverter.schema.usx3.Verse)

Aggregations

Note (biblemulticonverter.schema.usx3.Note)2 Verse (biblemulticonverter.schema.usx3.Verse)2 IOException (java.io.IOException)2 ChapterIdentifier (biblemulticonverter.format.paratext.model.ChapterIdentifier)1 Cell (biblemulticonverter.schema.usx3.Cell)1 Chapter (biblemulticonverter.schema.usx3.Chapter)1 Char (biblemulticonverter.schema.usx3.Char)1 Figure (biblemulticonverter.schema.usx3.Figure)1 Optbreak (biblemulticonverter.schema.usx3.Optbreak)1 Para (biblemulticonverter.schema.usx3.Para)1 Ref (biblemulticonverter.schema.usx3.Ref)1 Row (biblemulticonverter.schema.usx3.Row)1 Sidebar (biblemulticonverter.schema.usx3.Sidebar)1 Table (biblemulticonverter.schema.usx3.Table)1 Usx (biblemulticonverter.schema.usx3.Usx)1 FileInputStream (java.io.FileInputStream)1 JAXBContext (javax.xml.bind.JAXBContext)1 Unmarshaller (javax.xml.bind.Unmarshaller)1 XMLInputFactory (javax.xml.stream.XMLInputFactory)1 XMLStreamReader (javax.xml.stream.XMLStreamReader)1