use of biblemulticonverter.data.FormattedText.FormattingInstructionKind in project BibleMultiConverter by schierlm.
the class ZefaniaXML method parseContent.
private boolean parseContent(Visitor<RuntimeException> visitor, List<Object> contentList, Map<BookID, String> abbrMap) throws IOException {
boolean contentFound = false;
for (Object n : contentList) {
if (n instanceof String) {
String value = normalize((String) n, false);
visitor.visitText(value);
contentFound |= value.trim().length() > 0;
} else if (n instanceof DIV || n instanceof NOTE) {
NOTE note;
if (n instanceof DIV) {
note = ((DIV) n).getNOTE();
} else {
note = (NOTE) n;
}
if (note.getContent().size() == 0)
continue;
Visitor<RuntimeException> v;
v = visitor.visitFootnote();
boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
if (!subContentFound)
visitEmptyMarker(v);
contentFound = true;
} else if (n instanceof BR) {
BR br = (BR) n;
Visitor<RuntimeException> v = visitor;
int count = 1;
if (br.getCount() != null) {
count = br.getCount().intValue();
}
if (count < 1 || count > 10)
count = 1;
for (int ii = 0; ii < count; ii++) {
if (br.getArt() == EnumBreak.X_P)
v.visitLineBreak(LineBreakKind.PARAGRAPH);
else
v.visitLineBreak(LineBreakKind.NEWLINE);
}
contentFound = true;
} else if (n instanceof XREF) {
XREF xref = (XREF) n;
Visitor<RuntimeException> footnoteVisitor = visitor.visitFootnote();
footnoteVisitor.visitText(FormattedText.XREF_MARKER.trim());
boolean first = true;
if (xref.getMscope() == null) {
if (xref.getFscope() == null) {
System.out.println("WARNING: Ignoring XREF with neither fscope nor mscope");
} else {
for (String fscope : xref.getFscope().split("; ")) {
Matcher m = Utils.compilePattern("([0-9A-Za-z]+) ([0-9]+), ([0-9]+[a-z]?)").matcher(fscope);
if (!m.matches()) {
System.out.println("WARNING: Unable to parse XREF fscope " + fscope + ", skipping");
continue;
}
String xBook = m.group(1);
int xChapter = Integer.parseInt(m.group(2));
String xVerse = m.group(3);
BookID xID = null;
for (Map.Entry<BookID, String> abbrEntry : abbrMap.entrySet()) {
if (abbrEntry.getValue().equals(xBook)) {
xID = abbrEntry.getKey();
break;
}
}
if (xID == null) {
System.out.println("WARNING: Book not found for XREF fscope " + fscope + ", skipping");
continue;
}
footnoteVisitor.visitText(" ");
footnoteVisitor.visitCrossReference(xBook, xID, xChapter, xVerse, xChapter, xVerse).visitText(xBook + " " + xChapter + ":" + xVerse);
}
}
} else {
for (String mscope : xref.getMscope().split(" ")) {
Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
if (!m.matches())
continue;
BookID bookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
int chapter = Integer.parseInt(m.group(2)), endChapter = chapter;
if (m.group(3) != null)
endChapter = Integer.parseInt(m.group(3).substring(1));
String verse = m.group(4);
String endVerse = m.group(5);
if (endVerse == null)
endVerse = verse;
else
endVerse = endVerse.substring(1);
if (verse.equals("0") || endVerse.equals("0"))
continue;
if (chapter == endChapter && Integer.parseInt(verse) > Integer.parseInt(endVerse))
continue;
String abbr = abbrMap.get(bookID);
if (abbr == null)
abbr = bookID.getOsisID();
if (first)
first = false;
else
footnoteVisitor.visitText(" ");
footnoteVisitor.visitCrossReference(abbr, bookID, chapter, verse, endChapter, endVerse).visitText(abbr + " " + chapter + ":" + verse);
}
}
if (first)
visitEmptyMarker(footnoteVisitor);
contentFound = true;
} else if (n instanceof JAXBElement<?>) {
String name = ((JAXBElement<?>) n).getName().toString();
Object nn = ((JAXBElement<?>) n).getValue();
if (name.equals("STYLE") && nn instanceof STYLE) {
String css = ((STYLE) nn).getCss();
String id = ((STYLE) nn).getId();
FormattingInstructionKind kind = null;
if (id != null && id.equals("cl:divineName")) {
kind = FormattingInstructionKind.DIVINE_NAME;
} else if (css == null || css.startsWith("display:block;")) {
kind = null;
} else if (css.contains("italic")) {
kind = FormattingInstructionKind.ITALIC;
} else if (css.contains("bold")) {
kind = FormattingInstructionKind.BOLD;
} else if (css.toLowerCase().contains("color:#ff0000")) {
kind = FormattingInstructionKind.WORDS_OF_JESUS;
} else if (css.contains("color:blue")) {
kind = FormattingInstructionKind.LINK;
} else if (css.contains("vertical-align:super") || css.equals("font-size:small")) {
kind = FormattingInstructionKind.SUPERSCRIPT;
}
Visitor<RuntimeException> contentVisitor = visitor;
if (kind != null) {
contentVisitor = contentVisitor.visitFormattingInstruction(kind);
} else if (css != null && (kind == null || !kind.getCss().equals(css))) {
contentVisitor = contentVisitor.visitCSSFormatting(css);
}
List<Object> content = ((STYLE) nn).getContent();
boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
if (!subContentFound)
visitEmptyMarker(contentVisitor);
} else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
GRAM gram = (GRAM) nn;
boolean addSpace = false;
int lastIndex = gram.getContent().size() - 1;
if (lastIndex >= 0 && gram.getContent().get(lastIndex) instanceof String) {
String lastString = normalize((String) gram.getContent().get(lastIndex), false);
if (lastString.endsWith(" ")) {
String afterString = "";
int pos = contentList.indexOf(n);
if (pos < contentList.size() - 1 && contentList.get(pos + 1) instanceof String) {
afterString = normalize((String) contentList.get(pos + 1), false);
}
if (!afterString.startsWith(" ")) {
addSpace = true;
gram.getContent().set(lastIndex, lastString.substring(0, lastString.length() - 1));
}
}
}
Visitor<RuntimeException> strongVisitor = visitor;
int[] strongs = null;
if (gram.getStr() != null) {
List<String> strongList = new ArrayList<String>(Arrays.asList(gram.getStr().trim().replaceAll(" ++", " ").replace("G", "").replace("H", "").split(" ")));
for (int i = 0; i < strongList.size(); i++) {
if (!strongList.get(i).matches("[0-9]+")) {
System.out.println("WARNING: Skipping invalid Strong number " + strongList.get(i));
strongList.remove(i);
i--;
}
}
strongs = new int[strongList.size()];
for (int i = 0; i < strongs.length; i++) {
strongs[i] = Integer.parseInt(strongList.get(i));
}
}
String[] rmac = null;
if (gram.getRmac() != null && gram.getRmac().length() > 0) {
List<String> rmacList = new ArrayList<String>(Arrays.asList(gram.getRmac().toUpperCase().split(" ")));
for (int i = 0; i < rmacList.size(); i++) {
String rmacValue = rmacList.get(i);
if (rmacValue.endsWith("-"))
rmacValue = rmacValue.substring(0, rmacValue.length() - 1);
rmacList.set(i, rmacValue);
if (!rmacValue.matches(Utils.RMAC_REGEX)) {
System.out.println("WARNING: Skipping invalid RMAC: " + rmacValue);
rmacList.remove(i);
i--;
}
rmac = (String[]) rmacList.toArray(new String[rmacList.size()]);
}
}
if (strongs != null && strongs.length == 0)
strongs = null;
if (rmac != null && rmac.length == 0)
rmac = null;
if (strongs != null)
strongVisitor = strongVisitor.visitGrammarInformation(strongs, rmac, null);
if (!parseContent(strongVisitor, gram.getContent(), abbrMap) && strongVisitor != visitor) {
visitEmptyMarker(strongVisitor);
}
if (addSpace)
visitor.visitText(" ");
} else {
continue;
}
contentFound = true;
}
}
return contentFound;
}
use of biblemulticonverter.data.FormattedText.FormattingInstructionKind in project BibleMultiConverter by schierlm.
the class HaggaiXML method parseContent.
private boolean parseContent(Visitor<RuntimeException> visitor, List<? extends Object> contentList, Map<BookID, String> abbrMap) throws IOException {
boolean contentFound = false;
for (Object n : contentList) {
if (n instanceof String) {
String value = normalize((String) n, false);
visitor.visitText(value);
contentFound |= value.trim().length() > 0;
} else if (n instanceof JAXBElement<?>) {
String name = ((JAXBElement<?>) n).getName().toString();
Object nn = ((JAXBElement<?>) n).getValue();
if (name.equals("STYLE") && nn instanceof STYLE) {
TStyleFix fs = ((STYLE) nn).getFs();
FormattingInstructionKind kind;
switch(fs) {
case BOLD:
kind = FormattingInstructionKind.BOLD;
break;
case DIVINE_NAME:
kind = FormattingInstructionKind.DIVINE_NAME;
break;
case EMPHASIS:
case ITALIC:
kind = FormattingInstructionKind.ITALIC;
break;
case LINE_THROUGH:
kind = FormattingInstructionKind.STRIKE_THROUGH;
break;
case SUB:
kind = FormattingInstructionKind.SUBSCRIPT;
break;
case SUPER:
kind = FormattingInstructionKind.SUPERSCRIPT;
break;
case UNDERLINE:
kind = FormattingInstructionKind.UNDERLINE;
break;
case UPPERCASE:
case ACROSTIC:
case ILLUMINATED:
case LOWERCASE:
case NORMAL:
case OVERLINE:
case SMALL_CAPS:
default:
kind = null;
break;
}
if (kind == null)
throw new IOException(fs.toString());
Visitor<RuntimeException> contentVisitor = visitor;
if (kind != null) {
contentVisitor = contentVisitor.visitFormattingInstruction(kind);
}
List<Serializable> content = ((STYLE) nn).getContent();
boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
if (!subContentFound)
visitEmptyMarker(contentVisitor);
} else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
GRAM gram = (GRAM) nn;
Visitor<RuntimeException> strongVisitor = visitor;
int[] strongs = null;
if (gram.getStr() != null) {
List<String> strongList = new ArrayList<String>(Arrays.asList(gram.getStr().trim().replaceAll(" ++", " ").replace("G", "").replace("H", "").split(" ")));
for (int i = 0; i < strongList.size(); i++) {
if (!strongList.get(i).matches("[0-9]+")) {
System.out.println("WARNING: Skipping invalid Strong number " + strongList.get(i));
strongList.remove(i);
i--;
}
}
strongs = new int[strongList.size()];
for (int i = 0; i < strongs.length; i++) {
strongs[i] = Integer.parseInt(strongList.get(i));
}
}
String[] rmac = null;
if (gram.getRmac() != null && gram.getRmac().length() > 0) {
List<String> rmacList = new ArrayList<String>(Arrays.asList(gram.getRmac().toUpperCase().split(" ")));
for (int i = 0; i < rmacList.size(); i++) {
String rmacValue = rmacList.get(i);
if (rmacValue.endsWith("-"))
rmacValue = rmacValue.substring(0, rmacValue.length() - 1);
rmacList.set(i, rmacValue);
if (!rmacValue.matches(Utils.RMAC_REGEX)) {
System.out.println("WARNING: Skipping invalid RMAC: " + rmacValue);
rmacList.remove(i);
i--;
}
rmac = (String[]) rmacList.toArray(new String[rmacList.size()]);
}
}
if (strongs != null && strongs.length == 0)
strongs = null;
if (rmac != null && rmac.length == 0)
rmac = null;
if (strongs != null || rmac != null)
strongVisitor = strongVisitor.visitGrammarInformation(strongs, rmac, null);
if (!parseContent(strongVisitor, gram.getContent(), abbrMap) && strongVisitor != visitor) {
visitEmptyMarker(strongVisitor);
}
} else if (name.equals("NOTE") && nn instanceof NOTE) {
NOTE note = (NOTE) nn;
if (note.getContent().size() == 0)
continue;
Visitor<RuntimeException> v;
v = visitor.visitFootnote();
boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
if (!subContentFound)
visitEmptyMarker(v);
contentFound = true;
} else if (name.equals("BR")) {
visitor.visitLineBreak(LineBreakKind.NEWLINE);
contentFound = true;
} else {
throw new IOException(name);
}
contentFound = true;
} else {
throw new IOException(n.getClass().toString());
}
}
return contentFound;
}
use of biblemulticonverter.data.FormattedText.FormattingInstructionKind in project BibleMultiConverter by schierlm.
the class ZefaniaXMLRoundtrip method parseContent.
private boolean parseContent(Visitor<RuntimeException> visitor, List<Object> contentList, Map<BookID, String> abbrMap) throws IOException {
boolean contentFound = false;
for (Object n : contentList) {
if (n instanceof String) {
String value = normalize((String) n, false);
visitor.visitText(value);
contentFound |= value.trim().length() > 0;
} else if (n instanceof DIV || n instanceof NOTE) {
NOTE note;
Visitor<RuntimeException> v;
if (n instanceof DIV) {
note = ((DIV) n).getNOTE();
if (note.getContent().size() == 0)
continue;
v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "div").visitFootnote();
} else {
note = (NOTE) n;
if (note.getContent().size() == 0)
continue;
v = visitor.visitFootnote();
}
boolean subContentFound = parseContent(v, note.getContent(), abbrMap);
if (!subContentFound)
visitEmptyMarker(v);
contentFound = true;
} else if (n instanceof BR) {
BR br = (BR) n;
Visitor<RuntimeException> v = visitor;
int count = 1;
if (br.getCount() != null) {
count = br.getCount().intValue();
v = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "newline-group", br.getCount() + "--" + br.getArt().value());
}
if (count < 1 || count > 10)
throw new RuntimeException();
for (int ii = 0; ii < count; ii++) {
switch(br.getArt()) {
case X_NL:
v.visitLineBreak(LineBreakKind.NEWLINE);
break;
case X_P:
v.visitLineBreak(LineBreakKind.PARAGRAPH);
break;
default:
throw new RuntimeException(br.getArt().toString());
}
}
contentFound = true;
} else if (n instanceof XREF) {
XREF xref = (XREF) n;
Visitor<RuntimeException> footnoteVisitor = visitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "inner-xref").visitFootnote();
boolean first = true;
for (String mscope : xref.getMscope().split(" ")) {
Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
if (!m.matches())
throw new IOException(mscope);
BookID bookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
int chapter = Integer.parseInt(m.group(2)), endChapter = chapter;
if (m.group(3) != null)
endChapter = Integer.parseInt(m.group(3).substring(1));
String verse = m.group(4);
if (verse.equals("0"))
verse = "1//G";
String endVerse = m.group(5);
if (endVerse == null)
endVerse = verse;
else
endVerse = endVerse.substring(1);
if (endVerse.equals("0"))
endVerse = "1//G";
String abbr = abbrMap.get(bookID);
if (abbr == null)
abbr = bookID.getOsisID();
if (first)
first = false;
else
footnoteVisitor.visitText(" ");
if (chapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
String tmp = verse;
verse = endVerse;
endVerse = tmp;
}
footnoteVisitor.visitCrossReference(abbr, bookID, chapter, verse, endChapter, endVerse).visitText(abbr + " " + chapter + ":" + verse);
}
contentFound = true;
} else if (n instanceof JAXBElement<?>) {
String name = ((JAXBElement<?>) n).getName().toString();
Object nn = ((JAXBElement<?>) n).getValue();
if (name.equals("STYLE") && nn instanceof STYLE) {
String css = ((STYLE) nn).getCss();
String id = ((STYLE) nn).getId();
if (id != null && css != null)
throw new IOException(id + "/" + css);
if (css != null && css.startsWith("display:block;")) {
// not really a formatting instruction, but more some
// clever way of indentation
List<Object> content = ((STYLE) nn).getContent();
Visitor<RuntimeException> contentVisitor = visitor.visitCSSFormatting(css);
boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
if (!subContentFound)
visitEmptyMarker(contentVisitor);
} else {
FormattingInstructionKind kind;
if (id != null && id.equals("cl:divineName")) {
kind = FormattingInstructionKind.DIVINE_NAME;
} else if (css == null) {
throw new IOException(id);
} else if (css.contains("italic")) {
kind = FormattingInstructionKind.ITALIC;
} else if (css.contains("bold")) {
kind = FormattingInstructionKind.BOLD;
} else if (css.equalsIgnoreCase("color:#FF0000")) {
kind = FormattingInstructionKind.WORDS_OF_JESUS;
} else if (css.equals("color:blue")) {
kind = FormattingInstructionKind.LINK;
} else if (css.equals("color:#00CC33;font-size:8pt;vertical-align:super") || css.equals("font-size:small")) {
kind = FormattingInstructionKind.SUPERSCRIPT;
} else {
throw new IOException(css);
}
List<Object> content = ((STYLE) nn).getContent();
Visitor<RuntimeException> contentVisitor = visitor.visitFormattingInstruction(kind);
if (css != null && !kind.getCss().equals(css)) {
contentVisitor = contentVisitor.visitCSSFormatting(css);
}
if (content.size() == 0) {
visitEmptyMarker(contentVisitor);
} else {
boolean subContentFound = parseContent(contentVisitor, content, abbrMap);
if (!subContentFound)
visitEmptyMarker(contentVisitor);
}
}
} else if ((name.equals("gr") || name.equals("GRAM")) && nn instanceof GRAM) {
GRAM gram = (GRAM) nn;
Visitor<RuntimeException> strongVisitor = visitor;
if (!name.equals("GRAM")) {
strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "gram-tag", name);
}
if (gram.getStr() == null && gram.getRmac() == null)
throw new IOException();
int[] strongs = null;
if (gram.getStr() != null) {
String strong = gram.getStr().trim().replaceAll(" ++", " ");
if (strong.length() == 0)
strong = "0";
if (strong.equals("?"))
strong = "99111";
if (strong.startsWith("G")) {
strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "strong-prefix", "G");
strong = strong.replace("G", "");
} else if (strong.startsWith("H")) {
strongVisitor = strongVisitor.visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "strong-prefix", "H");
strong = strong.replace("H", "");
}
if (!strong.matches("[0-9]+( [0-9]+)*"))
throw new IOException(strong);
String[] tmpStrongs = strong.split(" ");
strongs = new int[tmpStrongs.length];
for (int i = 0; i < tmpStrongs.length; i++) {
strongs[i] = Integer.parseInt(tmpStrongs[i]);
}
}
String[] rmacs = null;
if (gram.getRmac() != null) {
String rmac = gram.getRmac();
rmacs = rmac.split(" ");
}
strongVisitor = strongVisitor.visitGrammarInformation(strongs, rmacs, null);
if (!parseContent(strongVisitor, gram.getContent(), abbrMap)) {
visitEmptyMarker(strongVisitor);
}
} else {
throw new IOException(name + "/" + nn.getClass().toString());
}
contentFound = true;
} else {
throw new IOException(n.getClass().toString());
}
}
return contentFound;
}
Aggregations