use of biblemulticonverter.schema.zef2005.CHAPTER in project BibleMultiConverter by schierlm.
the class ZefaniaXML method parseBible.
protected Bible parseBible(XMLBIBLE doc) throws Exception {
Bible result = new Bible(doc.getBiblename());
MetadataBook metadata = new MetadataBook();
if (doc.getStatus() != null) {
metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
}
if (doc.getVersion() != null) {
metadata.setValue(MetadataBookKey.version, doc.getVersion());
}
if (doc.getRevision() != null) {
metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
}
for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
if (elem.getValue() == null)
continue;
String value = normalize(elem.getValue().toString(), true).trim();
if (value.length() != 0)
metadata.setValue(elem.getName().getLocalPart(), value);
}
metadata.finished();
if (metadata.getKeys().size() > 0)
result.getBooks().add(metadata.getBook());
Set<String> abbrs = new HashSet<String>();
Set<String> shortnames = new HashSet<String>();
Set<String> longnames = new HashSet<String>();
Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
for (BIBLEBOOK e : nl) {
String shortname = e.getBsname();
int number = e.getBnumber().intValue();
BookID bookID;
try {
bookID = BookID.fromZefId(number);
} catch (IllegalArgumentException ex) {
continue;
}
if (shortname == null || shortname.length() == 0)
shortname = "_" + bookID.getOsisID();
String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
abbr = "X" + abbr;
if (abbr.length() == 1)
abbr += "x";
if (abbrs.contains(abbr)) {
for (int i = 2; i < 100; i++) {
if (!abbrs.contains(abbr + i)) {
abbr = abbr + i;
break;
}
}
}
abbrs.add(abbr);
abbrMap.put(bookID, abbr);
}
abbrs.clear();
EnumMap<BookID, Book> existingBooks = new EnumMap<BookID, Book>(BookID.class);
for (BIBLEBOOK e : nl) {
String shortname = e.getBsname();
String longname = e.getBname();
int number = e.getBnumber().intValue();
BookID bookID;
try {
bookID = BookID.fromZefId(number);
} catch (IllegalArgumentException ex) {
System.out.println("WARNING: Skipping book with unknown id " + number);
continue;
}
if (shortname == null || shortname.length() == 0)
shortname = "_" + bookID.getOsisID();
if (longname == null || longname.length() == 0)
longname = "_" + bookID.getEnglishName();
else
longname = longname.replaceAll(" ++", " ").trim();
String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
abbr = "X" + abbr;
if (abbr.length() == 1)
abbr += "x";
if (abbrs.contains(abbr)) {
for (int i = 2; i < 100; i++) {
if (!abbrs.contains(abbr + i)) {
abbr = abbr + i;
break;
}
}
}
abbrs.add(abbr);
if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
shortname = "Exo";
longname = "Exodus";
}
if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
shortname = "2Chr";
}
if (shortnames.contains(shortname)) {
System.out.println("WARNING: Duplicate short name " + shortname);
for (int i = 2; i < 100; i++) {
if (!shortnames.contains(shortname + i)) {
shortname = shortname + i;
break;
}
}
}
shortnames.add(shortname);
if (longnames.contains(longname)) {
System.out.println("WARNING: Duplicate long name " + shortname);
for (int i = 2; i < 100; i++) {
if (!longnames.contains(longname + i)) {
longname = longname + i;
break;
}
}
}
longnames.add(longname);
Book book = existingBooks.get(bookID);
if (book == null) {
book = new Book(abbr, bookID, shortname, longname);
existingBooks.put(bookID, book);
result.getBooks().add(book);
}
List<Headline> headlineBuffer = new ArrayList<Headline>();
for (CHAPTER e2 : e.getCHAPTER()) {
int chapterNumber = e2.getCnumber().intValue();
while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
Chapter chapter = book.getChapters().get(chapterNumber - 1);
int existingVerses = chapter.getVerses().size();
for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
if (e3 instanceof CAPTION) {
CAPTION caption = (CAPTION) e3;
int depth;
if (caption.getType() == null) {
depth = 9;
} else {
switch(caption.getType()) {
case X_H_1:
depth = 1;
break;
case X_H_2:
depth = 2;
break;
case X_H_3:
depth = 3;
break;
case X_H_4:
depth = 4;
break;
case X_H_5:
depth = 5;
break;
case X_H_6:
depth = 6;
break;
default:
depth = 9;
break;
}
}
int lastDepth = headlineBuffer.size() == 0 ? -1 : headlineBuffer.get(headlineBuffer.size() - 1).getDepth();
if (depth <= lastDepth)
depth = lastDepth == 9 ? 9 : lastDepth + 1;
Headline h = new Headline(depth);
if (parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
h.trimWhitespace();
h.finished();
headlineBuffer.add(h);
}
} else if (e3 instanceof REMARK) {
REMARK remark = (REMARK) e3;
int vref = remark.getVref().intValue();
int idx = chapter.getVerseIndex("" + vref);
if (idx == -1)
continue;
Verse v = chapter.getVerses().get(idx);
if (remark.getContent().size() != 1)
continue;
String remarkText = normalize((String) remark.getContent().get(0), true).trim();
v.getAppendVisitor().visitFootnote().visitText(remarkText);
} else if (e3 instanceof XREF) {
XREF xref = (XREF) e3;
int vref = xref.getVref().intValue();
int idx = chapter.getVerseIndex("" + vref);
if (idx == -1)
continue;
Verse v = chapter.getVerses().get(idx);
Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitFootnote();
boolean first = true;
for (String mscope : xref.getMscope().split(" ")) {
Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
if (!m.matches())
continue;
BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
if (m.group(3) != null)
endChapter = Integer.parseInt(m.group(3).substring(1));
String verse = m.group(4);
String endVerse = m.group(5);
if (endVerse == null)
endVerse = verse;
else
endVerse = endVerse.substring(1);
if (verse.equals("0") || endVerse.equals("0"))
continue;
if (xrefChapter == endChapter && Integer.parseInt(verse) > Integer.parseInt(endVerse))
continue;
String xrefAbbr = abbrMap.get(xrefBookID);
if (xrefAbbr == null)
xrefAbbr = xrefBookID.getOsisID();
if (first)
first = false;
else
footnoteVisitor.visitText(" ");
footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
}
if (first)
visitEmptyMarker(footnoteVisitor);
} else if (e3 instanceof PROLOG) {
PROLOG prolog = (PROLOG) e3;
if (chapter.getProlog() != null)
continue;
FormattedText prologText = new FormattedText();
if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
prologText.trimWhitespace();
prologText.finished();
chapter.setProlog(prologText);
}
} else if (e3 instanceof VERS) {
VERS vers = (VERS) e3;
int vnumber = vers.getVnumber() == null ? 0 : vers.getVnumber().intValue();
if (vnumber == 0)
vnumber = chapter.getVerses().size() + 1;
String verseNumber = vnumber + (vers.getAix() == null ? "" : vers.getAix());
String infix = "";
while (chapter.getVerseIndex(verseNumber) != -1) {
for (char ch = 'a'; ch <= 'z'; ch++) {
if (chapter.getVerseIndex(vnumber + infix + ch) == -1) {
verseNumber = vnumber + infix + ch;
break;
}
}
infix += "-";
}
Verse verse = new Verse(verseNumber);
Visitor<RuntimeException> visitor = verse.getAppendVisitor();
boolean contentFound = false;
if (headlineBuffer.size() > 0) {
for (Headline h : headlineBuffer) {
h.accept(visitor.visitHeadline(h.getDepth()));
}
headlineBuffer.clear();
contentFound = true;
}
contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
if (contentFound) {
verse.trimWhitespace();
chapter.getVerses().add(verse);
}
}
}
for (Verse v : chapter.getVerses()) {
if (existingVerses > 0) {
existingVerses--;
continue;
}
v.finished();
}
}
}
return result;
}
use of biblemulticonverter.schema.zef2005.CHAPTER in project BibleMultiConverter by schierlm.
the class ZefaniaXMLRoundtrip method parseContent.
private boolean parseContent(Visitor<RuntimeException> visitor, List<Object> contentList, Map<BookID, String> abbrMap) throws IOException {
boolean contentFound = false;
for (Object n : contentList) {
if (n instanceof String) {
String value = normalize((String) n, false);
visitor.visitText(value);
contentFound |= value.trim().length() > 0;
} else if (n instanceof DIV || n instanceof NOTE) {
NOTE note;
Visitor<RuntimeException> v;
if (n instanceof DIV) {
note = ((DIV) n).getNOTE();
if (note.getContent().size() == 0)
continue;
v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "div").visitFootnote();
} else {
note = (NOTE) n;
if (note.getContent().size() == 0)
continue;
v = visitor.visitFootnote();
}
boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
if (!subContentFound)
visitEmptyMarker(v);
contentFound = true;
} else if (n instanceof BR) {
BR br = (BR) n;
Visitor<RuntimeException> v = visitor;
int count = 1;
if (br.getCount() != null) {
count = br.getCount().intValue();
v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "newline-group", br.getCount() + "--" + br.getArt().value());
}
if (count < 1 || count > 10)
throw new RuntimeException();
for (int ii = 0; ii < count; ii++) {
switch(br.getArt()) {
case X_NL:
v.visitLineBreak(LineBreakKind.NEWLINE);
break;
case X_P:
v.visitLineBreak(LineBreakKind.PARAGRAPH);
break;
default:
throw new RuntimeException(br.getArt().toString());
}
}
contentFound = true;
} else if (n instanceof XREF) {
XREF xref = (XREF) n;
Visitor<RuntimeException> footnoteVisitor = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "inner-xref").visitFootnote();
boolean first = true;
for (String mscope : xref.getMscope().split(" ")) {
Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
if (!m.matches())
throw new IOException(mscope);
BookID bookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
int chapter = Integer.parseInt(m.group(2)), endChapter = chapter;
if (m.group(3) != null)
endChapter = Integer.parseInt(m.group(3).substring(1));
String verse = m.group(4);
if (verse.equals("0"))
verse = "1//G";
String endVerse = m.group(5);
if (endVerse == null)
endVerse = verse;
else
endVerse = endVerse.substring(1);
if (endVerse.equals("0"))
endVerse = "1//G";
String abbr = abbrMap.get(bookID);
if (abbr == null)
abbr = bookID.getOsisID();
if (first)
first = false;
else
footnoteVisitor.visitText(" ");
if (chapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
String tmp = verse;
verse = endVerse;
endVerse = tmp;
}
footnoteVisitor.visitCrossReference(abbr, bookID, chapter, verse, endChapter, endVerse).visitText(abbr + " " + chapter + ":" + verse);
}
contentFound = true;
} else if (n instanceof JAXBElement<?>) {
String name = ((JAXBElement<?>) n).getName().toString();
Object nn = ((JAXBElement<?>) n).getValue();
if (name.equals("STYLE") && nn instanceof STYLE) {
String css = ((STYLE) nn).getCss();
String id = ((STYLE) nn).getId();
if (id != null && css != null)
throw new IOException(id + "/" + css);
if (css != null && css.startsWith("display:block;")) {
// not really a formatting instruction, but more some
// clever way of indentation
List<Object> content = ((STYLE) nn).getContent();
Visitor<RuntimeException> contentVisitor = visitor.visitCSSFormatting(css);
boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
if (!subContentFound)
visitEmptyMarker(contentVisitor);
} else {
FormattingInstructionKind kind;
if (id != null && id.equals("cl:divineName")) {
kind = FormattingInstructionKind.DIVINE_NAME;
} else if (css == null) {
throw new IOException(id);
} else if (css.contains("italic")) {
kind = FormattingInstructionKind.ITALIC;
} else if (css.contains("bold")) {
kind = FormattingInstructionKind.BOLD;
} else if (css.equalsIgnoreCase("color:#FF0000")) {
kind = FormattingInstructionKind.WORDS_OF_JESUS;
} else if (css.equals("color:blue")) {
kind = FormattingInstructionKind.LINK;
} else if (css.equals("color:#00CC33;font-size:8pt;vertical-align:super") || css.equals("font-size:small")) {
kind = FormattingInstructionKind.SUPERSCRIPT;
} else {
throw new IOException(css);
}
List<Object> content = ((STYLE) nn).getContent();
Visitor<RuntimeException> contentVisitor = visitor.visitFormattingInstruction(kind);
if (css != null && !kind.getCss().equals(css)) {
contentVisitor = contentVisitor.visitCSSFormatting(css);
}
if (content.size() == 0) {
visitEmptyMarker(contentVisitor);
} else {
boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
if (!subContentFound)
visitEmptyMarker(contentVisitor);
}
}
} else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
GRAM gram = (GRAM) nn;
Visitor<RuntimeException> strongVisitor = visitor;
if (!name.equals("GRAM")) {
strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "gram-tag", name);
}
if (gram.getStr() == null && gram.getRmac() == null)
throw new IOException();
int[] strongs = null;
if (gram.getStr() != null) {
String strong = gram.getStr().trim().replaceAll(" ++", " ");
if (strong.length() == 0)
strong = "0";
if (strong.equals("?"))
strong = "99111";
if (strong.startsWith("G")) {
strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "strong-prefix", "G");
strong = strong.replace("G", "");
} else if (strong.startsWith("H")) {
strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "strong-prefix", "H");
strong = strong.replace("H", "");
}
if (!strong.matches("[0-9]+( [0-9]+)*"))
throw new IOException(strong);
String[] tmpStrongs = strong.split(" ");
strongs = new int[tmpStrongs.length];
for (int i = 0; i < tmpStrongs.length; i++) {
strongs[i] = Integer.parseInt(tmpStrongs[i]);
}
}
String[] rmacs = null;
if (gram.getRmac() != null) {
String rmac = gram.getRmac();
rmacs = rmac.split(" ");
}
strongVisitor = strongVisitor.visitGrammarInformation(strongs, rmacs, null);
if (!parseContent(strongVisitor, gram.getContent(), abbrMap)) {
visitEmptyMarker(strongVisitor);
}
} else {
throw new IOException(name + "/" + nn.getClass().toString());
}
contentFound = true;
} else {
throw new IOException(n.getClass().toString());
}
}
return contentFound;
}
Aggregations