use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class StrongDictionary method doImport.
@Override
public Bible doImport(File inputFile) throws Exception {
Bible result = new Bible("Strong's dictionary");
MetadataBook mb = new MetadataBook();
mb.setValue(MetadataBookKey.description, "Strong's dictionary compiled by BibleMultiConverter from public sources.");
mb.setValue(MetadataBookKey.source, "https://github.com/openscriptures/HebrewLexicon/ and https://github.com/morphgnt/strongs-dictionary-xml/");
mb.setValue(MetadataBookKey.rights, "Strong's Greek Dictionary is in the public domain. Strong's Hebrew Dictionary is provided as XML files by the Open Scriptures Hebrew Bible Project, which are licensed CC-BY-4.0.");
mb.finished();
result.getBooks().add(mb.getBook());
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc;
try (InputStream in = new URL("https://raw.githubusercontent.com/morphgnt/strongs-dictionary-xml/master/strongsgreek.xml").openStream()) {
doc = db.parse(in);
}
for (Node entryNode = doc.getDocumentElement().getLastChild().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
Element entry = (Element) entryNode;
int number = Integer.parseInt(entry.getAttribute("strongs"));
System.out.println("G" + number);
Book bk = new Book("G" + number, BookID.DICTIONARY_ENTRY, "G" + number, "G" + number);
FormattedText prolog = new FormattedText();
bk.getChapters().add(new Chapter());
bk.getChapters().get(0).setProlog(prolog);
result.getBooks().add(bk);
Visitor<RuntimeException> v = prolog.getAppendVisitor();
for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
if (childNode instanceof Text) {
if (childNode.getTextContent().replaceAll("[ \r\n\t]+", " ").equals(" or ") && childNode.getNextSibling().getNodeName().equals("greek")) {
v.visitFormattingInstruction(FormattingInstructionKind.ITALIC).visitText("-or-");
v.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (childNode.getTextContent().trim().length() > 0) {
visitAttribute(v, "Remark", childNode.getTextContent());
}
continue;
}
Element elem = (Element) childNode;
switch(elem.getNodeName()) {
case "strongs":
int compNumber = Integer.parseInt(elem.getTextContent());
if (compNumber != number)
throw new IOException(compNumber + " != " + number);
break;
case "greek":
v.visitHeadline(1).visitText(elem.getAttribute("unicode"));
visitAttribute(v, "Transliteration", elem.getAttribute("translit"));
break;
case "pronunciation":
visitAttribute(v, "Pronunciation", elem.getAttribute("strongs"));
break;
case "strongs_derivation":
visitAttribute(v, "Strongs Derivation", parseGreekContent(elem));
break;
case "strongs_def":
visitAttribute(v, "Strongs Definition", parseGreekContent(elem));
break;
case "kjv_def":
visitAttribute(v, "KJV Definition", parseGreekContent(elem));
if (elem.getNextSibling() != null && !elem.getNextSibling().getNodeName().equals("see")) {
Element moreInfo = doc.createElement("more_info");
elem.getParentNode().insertBefore(moreInfo, elem.getNextSibling());
while (moreInfo.getNextSibling() != null) {
if (moreInfo.getNextSibling().getNodeName().equals("see"))
break;
moreInfo.appendChild(moreInfo.getNextSibling());
}
if (moreInfo.getTextContent().trim().isEmpty())
moreInfo.getParentNode().removeChild(moreInfo);
}
break;
case "strongsref":
visitAttribute(v, "Reference", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
case "more_info":
visitAttribute(v, "More Information", parseGreekContent(elem));
break;
case "see":
visitAttribute(v, "See Also", "[" + elem.getAttribute("language").substring(0, 1) + Integer.parseInt(elem.getAttribute("strongs")) + "]");
break;
default:
throw new IOException(elem.getNodeName());
}
}
prolog.trimWhitespace();
prolog.finished();
}
try (InputStream in = new URL("https://raw.githubusercontent.com/openscriptures/HebrewLexicon/master/HebrewStrong.xml").openStream()) {
doc = db.parse(in);
}
for (Node entryNode = doc.getDocumentElement().getFirstChild(); entryNode != null; entryNode = entryNode.getNextSibling()) {
if (entryNode instanceof Text) {
if (!entryNode.getTextContent().trim().isEmpty()) {
throw new IOException(entryNode.getTextContent());
}
continue;
}
Element entry = (Element) entryNode;
String id = entry.getAttribute("id");
System.out.println(id);
Book bk = new Book(id, BookID.DICTIONARY_ENTRY, id, id);
FormattedText prolog = new FormattedText();
bk.getChapters().add(new Chapter());
bk.getChapters().get(0).setProlog(prolog);
result.getBooks().add(bk);
Visitor<RuntimeException> v = prolog.getAppendVisitor();
for (Node childNode = entry.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
if (childNode instanceof Text) {
if (!childNode.getTextContent().trim().isEmpty()) {
throw new IOException(childNode.getTextContent());
}
continue;
}
Element elem = (Element) childNode;
switch(elem.getNodeName()) {
case "w":
v.visitHeadline(1).visitText(elem.getTextContent());
visitAttribute(v, "Transliteration", elem.getAttribute("xlit"));
visitAttribute(v, "Pronunciation", elem.getAttribute("pron"));
if (elem.getAttribute("xml:lang").equals("heb")) {
visitAttribute(v, "Language", "Hebrew");
} else if (elem.getAttribute("xml:lang").equals("arc")) {
visitAttribute(v, "Language", "Aramaic");
} else if (elem.getAttribute("xml:lang").equals("x-pn")) {
visitAttribute(v, "Language", "Proper Noun");
} else {
throw new IOException(elem.getAttribute("xml:lang"));
}
visitAttribute(v, "Part of speech", elem.getAttribute("pos"));
break;
case "source":
visitAttribute(v, "Source", parseHebrewContent(elem));
break;
case "meaning":
visitAttribute(v, "Meaning", parseHebrewContent(elem));
break;
case "usage":
visitAttribute(v, "Usage", parseHebrewContent(elem));
break;
case "note":
// skip
break;
default:
throw new IOException(elem.getNodeName());
}
}
prolog.trimWhitespace();
prolog.finished();
}
return result;
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class TheWord method parseLine.
private int parseLine(Visitor<RuntimeException> visitor, String line, int pos, String endTag) {
Visitor<RuntimeException> garbageVisitor = new FormattedText().getAppendVisitor();
while (pos < line.length()) {
if (line.charAt(pos) != '<') {
int endPos = line.indexOf('<', pos);
if (endPos == -1)
endPos = line.length();
visitor.visitText(line.substring(pos, endPos).replaceAll("[\r\n\t ]+", " "));
pos = endPos;
continue;
}
if (endTag != null && line.startsWith(endTag, pos))
break;
if (pos + 2 < line.length() && line.charAt(pos + 2) == '>' && "bius".indexOf(line.charAt(pos + 1)) != -1) {
String newEndTag = "</" + line.charAt(pos + 1) + ">";
if (parseLine(garbageVisitor, line, pos + 3, newEndTag) != -1) {
FormattingInstructionKind kind;
switch(line.charAt(pos + 1)) {
case 'b':
kind = FormattingInstructionKind.BOLD;
break;
case 'i':
kind = FormattingInstructionKind.ITALIC;
break;
case 'u':
kind = FormattingInstructionKind.UNDERLINE;
break;
case 's':
kind = FormattingInstructionKind.STRIKE_THROUGH;
break;
default:
throw new RuntimeException("Cannot happen");
}
pos = parseLine(visitor.visitFormattingInstruction(kind), line, pos + 3, newEndTag);
continue;
}
} else if (line.startsWith("<sub>", pos) || line.startsWith("<sup>", pos)) {
String newEndTag = "</" + line.substring(pos + 1, pos + 5);
if (parseLine(garbageVisitor, line, pos + 5, newEndTag) != -1) {
FormattingInstructionKind kind = line.charAt(pos + 3) == 'p' ? FormattingInstructionKind.SUPERSCRIPT : FormattingInstructionKind.SUBSCRIPT;
pos = parseLine(visitor.visitFormattingInstruction(kind), line, pos + 5, newEndTag);
continue;
}
} else if (line.startsWith("<FR>", pos)) {
if (parseLine(garbageVisitor, line, pos + 4, "<Fr>") != -1) {
pos = parseLine(visitor.visitFormattingInstruction(FormattingInstructionKind.WORDS_OF_JESUS), line, pos + 4, "<Fr>");
continue;
}
} else if (line.startsWith("<FO>", pos)) {
if (parseLine(garbageVisitor, line, pos + 4, "<Fo>") != -1) {
pos = parseLine(visitor.visitFormattingInstruction(FormattingInstructionKind.LINK), line, pos + 4, "<Fo>");
continue;
}
} else if (line.startsWith("<font color=\"gray\">/</font>", pos)) {
visitor.visitVerseSeparator();
pos += 27;
continue;
} else if (line.startsWith("<CL>", pos)) {
visitor.visitLineBreak(LineBreakKind.NEWLINE);
pos += 4;
continue;
} else if (line.startsWith("<CM>", pos)) {
visitor.visitLineBreak(LineBreakKind.PARAGRAPH);
pos += 4;
continue;
} else if (line.startsWith("<CI><PI>", pos)) {
visitor.visitLineBreak(LineBreakKind.NEWLINE_WITH_INDENT);
pos += 8;
continue;
} else if (line.startsWith("<TS", pos) && pos + 3 < line.length()) {
char next = line.charAt(pos + 3);
int depth, len;
if (next == '>') {
depth = 1;
len = 4;
} else if (pos + 4 < line.length() && line.charAt(pos + 4) == '>' && next >= '1' && next <= '3') {
depth = next - '0';
len = 5;
} else {
depth = len = 0;
}
String end = "<Ts>", altEnd = len == 5 ? "<Ts" + next + ">" : "<Ts>";
if (line.indexOf(altEnd, pos) != -1 && (line.indexOf(end, pos) == -1 || line.indexOf(altEnd, pos) < line.indexOf(end, pos)))
end = altEnd;
if (len > 0 && parseLine(garbageVisitor, line, pos + len, end) != -1) {
pos = parseLine(visitor.visitHeadline(depth), line, pos + len, end);
continue;
}
} else if (line.startsWith("<RF", pos)) {
int closePos = line.indexOf('>', pos);
if (parseLine(garbageVisitor, line, closePos + 1, "<Rf>") != -1) {
pos = parseLine(visitor.visitFootnote(), line, closePos + 1, "<Rf>");
continue;
}
} else if (line.startsWith("<FI>", pos)) {
if (parseLine(garbageVisitor, line, pos + 4, "<Fi>") != -1) {
pos = parseLine(visitor.visitFormattingInstruction(FormattingInstructionKind.ITALIC), line, pos + 4, "<Fi>");
continue;
}
} else if (line.startsWith("<S%", pos)) {
int closePos = line.indexOf('>', pos);
if (parseLine(garbageVisitor, line, closePos + 1, "<s%>") != -1) {
String[] strongs = line.substring(pos + 3, closePos).split("%");
int[] strongNumbers = new int[strongs.length];
try {
for (int i = 0; i < strongs.length; i++) {
strongNumbers[i] = Integer.parseInt(strongs[i]);
}
pos = parseLine(visitor.visitGrammarInformation(strongNumbers, null, null), line, closePos + 1, "<s%>");
continue;
} catch (NumberFormatException ex) {
// malformed Strongs tag
}
}
} else if (line.startsWith("<XWG", pos) || line.startsWith("<XWH", pos)) {
int closePos = line.indexOf('>', pos);
try {
int number = Integer.parseInt(line.substring(pos + 4, closePos));
visitor.visitGrammarInformation(new int[] { number }, null, null);
pos = closePos + 1;
continue;
} catch (NumberFormatException ex) {
System.out.println("WARNING: Invalid Strong number in tag " + line.substring(pos, closePos + 1));
warningCount++;
}
} else if (line.startsWith("<WT", pos)) {
// TODO parse morph information
} else if (line.startsWith("<RX", pos)) {
// TODO parse cross references
} else if (line.startsWith("<CI>", pos) || line.startsWith("<PF", pos) || line.startsWith("<PI", pos)) {
// extra formatting not supported by BMC
} else if (warningCount < 100) {
System.out.println("WARNING: Skipping unknown tag " + line.substring(pos, Math.min(pos + 20, line.length())));
warningCount++;
}
// the tag is not supported (yet), skip the first character
visitor.visitText("<");
pos++;
}
if (endTag != null) {
if (line.startsWith(endTag, pos))
pos += endTag.length();
else
pos = -1;
}
return pos;
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class ZefDic method createXMLBible.
protected Dictionary createXMLBible(Bible bible) throws Exception {
final ObjectFactory of = new ObjectFactory();
Dictionary doc = of.createDictionary();
doc.setDicversion("1");
doc.setRevision("1");
doc.setRefbible("any");
doc.setType(TEnumDicType.X_DICTIONARY);
String title = null;
if (bible.getName().matches("X_(DICTIONARY|COMMENTARY|STRONG|DAILY)@.*")) {
String[] parts = bible.getName().split("@", 2);
doc.setType(TEnumDicType.valueOf(parts[0]));
doc.setRefbible(parts[1]);
} else {
title = bible.getName();
}
doc.setINFORMATION(of.createTINFORMATION());
doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName("title"), String.class, title));
MetadataBook metadata = bible.getMetadataBook();
if (metadata != null) {
for (String key : metadata.getKeys()) {
String value = metadata.getValue(key);
if (value.equals("-empty-"))
value = "";
if (key.equals(MetadataBookKey.version.toString())) {
doc.setDicversion(value);
} else if (key.equals(MetadataBookKey.revision.toString())) {
doc.setRevision(value);
} else if (Arrays.asList(INFORMATION_KEYS).contains(key)) {
doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
}
}
}
for (Book bk : bible.getBooks()) {
if (bk.getId().equals(BookID.METADATA))
continue;
if (!bk.getId().equals(BookID.DICTIONARY_ENTRY)) {
System.out.println("WARNING: Unable to export book " + bk.getAbbr());
continue;
}
final TItem item = of.createTItem();
if (!bk.getLongName().equals(bk.getShortName())) {
TItem itm = of.createTItem();
itm.setId(bk.getShortName());
appendTextElement(itm, "title", bk.getLongName());
TParagraph para2 = of.createTParagraph();
SeeType see = of.createSeeType();
see.setContent(bk.getLongName());
para2.getContent().add(new JAXBElement<SeeType>(new QName("see"), SeeType.class, see));
itm.getContent().add(new JAXBElement<TParagraph>(new QName("description"), TParagraph.class, para2));
doc.getItem().add(itm);
}
item.setId(bk.getLongName());
doc.getItem().add(item);
class ZefState {
TParagraph para = of.createTParagraph();
boolean eatParagraph = false;
public void flushPara(TItem item) {
item.getContent().add(new JAXBElement<TParagraph>(new QName("description"), TParagraph.class, para));
para = of.createTParagraph();
}
}
final ZefState state = new ZefState();
FormattedText text = bk.getChapters().get(0).getProlog();
class LevelVisitor implements Visitor<RuntimeException> {
final List<Serializable> target;
private LevelVisitor(ZefState state) {
target = state.para.getContent();
}
private LevelVisitor(MyAnyType parent) {
target = parent.getContent();
}
private LevelVisitor(TStyle parent) {
target = parent.getContent();
}
@Override
public int visitElementTypes(String elementTypes) throws RuntimeException {
return 0;
}
@Override
public Visitor<RuntimeException> visitHeadline(int depth) throws RuntimeException {
System.out.println("WARNING: Nested headlines are not supported");
return null;
}
@Override
public void visitStart() throws RuntimeException {
}
@Override
public void visitText(String text) throws RuntimeException {
if (text.length() > 0)
target.add(text);
}
@Override
public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
System.out.println("WARNING: footnotes are not supported");
return null;
}
@Override
public Visitor<RuntimeException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws RuntimeException {
if (firstChapter != lastChapter || !firstVerse.equals(lastVerse))
System.out.println("WARNING: Cross references to verse ranges are not supported");
BibLinkType b = of.createBibLinkType();
b.setBn("" + book.getZefID());
b.setCn1("" + firstChapter);
b.setVn1(firstVerse);
target.add(new JAXBElement<BibLinkType>(new QName("bib_link"), BibLinkType.class, b));
return null;
}
@Override
public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
String tag;
switch(kind) {
case BOLD:
tag = "strong";
break;
case ITALIC:
tag = "em";
break;
case SUPERSCRIPT:
tag = "sup";
break;
case SUBSCRIPT:
tag = "sub";
break;
default:
return visitCSSFormatting(kind.getCss());
}
MyAnyType mat = of.createMyAnyType();
target.add(new JAXBElement<MyAnyType>(new QName(tag), MyAnyType.class, mat));
return new LevelVisitor(mat);
}
@Override
public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
TStyle style = of.createTStyle();
style.setCss(css);
target.add(of.createTStyleSTYLE(style));
return new LevelVisitor(style);
}
@Override
public void visitVerseSeparator() throws RuntimeException {
System.out.println("WARNING: Verse separators are not supported");
}
@Override
public void visitLineBreak(LineBreakKind kind) throws RuntimeException {
System.out.println("WARNING: Nested line breaks are not supported");
}
@Override
public Visitor<RuntimeException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws RuntimeException {
System.out.println("WARNING: Grammar information is not supported");
return null;
}
@Override
public Visitor<RuntimeException> visitDictionaryEntry(String dictionary, String entry) throws RuntimeException {
if (dictionary.equals("reflink")) {
RefLinkType r = of.createRefLinkType();
r.setMscope(entry.substring(1).replace('-', ';'));
target.add(new JAXBElement<RefLinkType>(new QName("reflink"), RefLinkType.class, r));
} else {
SeeType see = of.createSeeType();
see.setTarget(dictionary.equals("dict") ? "x-self" : dictionary);
see.setContent(entry);
target.add(new JAXBElement<SeeType>(new QName("see"), SeeType.class, see));
}
return null;
}
@Override
public void visitRawHTML(RawHTMLMode mode, String raw) throws RuntimeException {
System.out.println("WARNING: Raw html output not supported");
}
@Override
public Visitor<RuntimeException> visitVariationText(String[] variations) throws RuntimeException {
throw new IllegalStateException("Variations not supported");
}
@Override
public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
return prio.handleVisitor(category, this);
}
@Override
public boolean visitEnd() throws RuntimeException {
return false;
}
}
;
text.accept(new Visitor<RuntimeException>() {
@Override
public int visitElementTypes(String elementTypes) throws RuntimeException {
return 0;
}
@Override
public Visitor<RuntimeException> visitHeadline(int depth) throws RuntimeException {
MyAnyType mat = of.createMyAnyType();
JAXBElement<MyAnyType> elem = new JAXBElement<>(new QName("title"), MyAnyType.class, mat);
if (depth == 1) {
state.flushPara(item);
item.getContent().add(elem);
} else {
state.para.getContent().add(elem);
}
return new LevelVisitor(mat);
}
@Override
public void visitStart() throws RuntimeException {
}
@Override
public void visitText(String text) throws RuntimeException {
new LevelVisitor(state).visitText(text);
}
@Override
public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
System.out.println("WARNING: footnotes are not supported");
return null;
}
@Override
public Visitor<RuntimeException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws RuntimeException {
return new LevelVisitor(state).visitCrossReference(bookAbbr, book, firstChapter, firstVerse, lastChapter, lastVerse);
}
@Override
public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
return new LevelVisitor(state).visitFormattingInstruction(kind);
}
@Override
public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
return new LevelVisitor(state).visitCSSFormatting(css);
}
@Override
public void visitVerseSeparator() throws RuntimeException {
System.out.println("WARNING: Verse separators are not supported");
}
@Override
public void visitLineBreak(LineBreakKind kind) throws RuntimeException {
if (state.eatParagraph) {
state.eatParagraph = false;
} else {
state.flushPara(item);
state.para = of.createTParagraph();
}
}
@Override
public Visitor<RuntimeException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws RuntimeException {
System.out.println("WARNING: Grammar information is not supported");
return null;
}
@Override
public Visitor<RuntimeException> visitDictionaryEntry(String dictionary, String entry) throws RuntimeException {
return new LevelVisitor(state).visitDictionaryEntry(dictionary, entry);
}
@Override
public void visitRawHTML(RawHTMLMode mode, String raw) throws RuntimeException {
System.out.println("WARNING: Raw html output not supported");
}
@Override
public Visitor<RuntimeException> visitVariationText(String[] variations) throws RuntimeException {
throw new IllegalStateException("Variations not supported");
}
@Override
public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
if (prio == ExtraAttributePriority.KEEP_CONTENT && category.equals("zefdic")) {
// "zefdic", "field", "pronunciation");
return null;
} else {
return prio.handleVisitor(category, this);
}
}
@Override
public boolean visitEnd() throws RuntimeException {
return false;
}
});
state.flushPara(item);
}
return doc;
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class ZefaniaXMLRoundtrip method parseBible.
protected Bible parseBible(XMLBIBLE doc) throws Exception {
Bible result = new Bible(doc.getBiblename());
MetadataBook metadata = new MetadataBook();
if (doc.getStatus() != null) {
metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
}
if (doc.getVersion() != null) {
metadata.setValue(MetadataBookKey.version, doc.getVersion());
}
if (doc.getRevision() != null) {
metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
}
for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
if (elem.getValue() == null)
continue;
String value = normalize(elem.getValue().toString(), true).trim();
if (value.length() == 0)
value = "-empty-";
metadata.setValue(elem.getName().getLocalPart(), value);
}
metadata.finished();
if (metadata.getKeys().size() > 0)
result.getBooks().add(metadata.getBook());
Set<String> abbrs = new HashSet<String>();
Set<String> shortnames = new HashSet<String>();
Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
for (BIBLEBOOK e : nl) {
String shortname = e.getBsname();
int number = e.getBnumber().intValue();
BookID bookID = BookID.fromZefId(number);
if (shortname == null)
shortname = "_" + bookID.getOsisID();
else if (shortname.length() == 0)
shortname = "_" + bookID.getOsisID() + "[[]]";
String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
abbr = "X" + abbr;
if (abbr.length() == 1)
abbr += "x";
if (abbrs.contains(abbr)) {
for (int i = 2; i < 100; i++) {
if (!abbrs.contains(abbr + i)) {
abbr = abbr + i;
break;
}
}
}
abbrs.add(abbr);
abbrMap.put(bookID, abbr);
}
abbrs.clear();
for (BIBLEBOOK e : nl) {
String shortname = e.getBsname();
String longname = e.getBname();
int number = e.getBnumber().intValue();
BookID bookID = BookID.fromZefId(number);
if (shortname == null)
shortname = "_" + bookID.getOsisID();
else if (shortname.length() == 0)
shortname = "_" + bookID.getOsisID() + "[[]]";
if (longname == null)
longname = "_" + bookID.getEnglishName();
else if (longname.length() == 0)
longname = "_" + bookID.getEnglishName() + "[[]]";
else
longname = longname.replaceAll(" ++", " ").trim();
String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
abbr = "X" + abbr;
if (abbr.length() == 1)
abbr += "x";
if (abbrs.contains(abbr)) {
for (int i = 2; i < 100; i++) {
if (!abbrs.contains(abbr + i)) {
abbr = abbr + i;
break;
}
}
}
abbrs.add(abbr);
if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
shortname = "Exo[[Gen]]";
longname = "Exodus[[Genesis]]";
}
if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
shortname = "2Chr[[1Chr]]";
}
if (shortnames.contains(shortname)) {
System.out.println("WARNING: Duplicate short name " + shortname);
for (int i = 2; i < 100; i++) {
if (!shortnames.contains(shortname + i + "[[" + shortname + "]]")) {
shortname = shortname + i + "[[" + shortname + "]]";
break;
}
}
}
shortnames.add(shortname);
Book book = new Book(abbr, bookID, shortname, longname);
int lastvref = -1;
List<Headline> headlineBuffer = new ArrayList<Headline>();
for (CHAPTER e2 : e.getCHAPTER()) {
int chapterNumber = e2.getCnumber().intValue();
while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
Chapter chapter = book.getChapters().get(chapterNumber - 1);
for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
if (e3 instanceof CAPTION) {
CAPTION caption = (CAPTION) e3;
if (lastvref != -1 && lastvref != caption.getVref().intValue())
throw new IOException();
lastvref = caption.getVref().intValue();
int level;
if (caption.getType() == null) {
level = 9;
} else {
switch(caption.getType()) {
case X_H_1:
level = 1;
break;
case X_H_2:
level = 2;
break;
case X_H_3:
level = 3;
break;
case X_H_4:
level = 4;
break;
case X_H_5:
level = 5;
break;
case X_H_6:
level = 6;
break;
default:
throw new IOException();
}
}
Headline h = new Headline(level);
headlineBuffer.add(h);
if (!parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
visitEmptyMarker(h.getAppendVisitor());
} else {
h.trimWhitespace();
}
h.finished();
} else if (e3 instanceof REMARK) {
REMARK remark = (REMARK) e3;
int vref = remark.getVref().intValue();
int idx = chapter.getVerseIndex("" + vref);
if (idx == -1)
throw new IOException(vref + ":" + remark.getContent());
Verse v = chapter.getVerses().get(idx);
if (remark.getContent().size() != 1)
throw new IOException();
String remarkText = normalize((String) remark.getContent().get(0), true).trim();
v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "remark").visitFootnote().visitText(remarkText);
} else if (e3 instanceof XREF) {
XREF xref = (XREF) e3;
int vref = xref.getVref().intValue();
int idx = chapter.getVerseIndex("" + vref);
if (idx == -1)
throw new IOException(vref + ":" + xref.getMscope());
Verse v = chapter.getVerses().get(idx);
Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "outer-xref").visitFootnote();
boolean first = true;
for (String mscope : xref.getMscope().split(" ")) {
Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
if (!m.matches())
throw new IOException(mscope);
BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
if (m.group(3) != null)
endChapter = Integer.parseInt(m.group(3).substring(1));
String verse = m.group(4);
if (verse.equals("0"))
verse = "1//G";
String endVerse = m.group(5);
if (endVerse == null)
endVerse = verse;
else
endVerse = endVerse.substring(1);
if (endVerse.equals("0"))
endVerse = "1//G";
String xrefAbbr = abbrMap.get(xrefBookID);
if (xrefAbbr == null)
xrefAbbr = xrefBookID.getOsisID();
if (first)
first = false;
else
footnoteVisitor.visitText(" ");
if (xrefChapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
String tmp = verse;
verse = endVerse;
endVerse = tmp;
}
footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
}
} else if (e3 instanceof PROLOG) {
PROLOG prolog = (PROLOG) e3;
if (prolog.getVref().intValue() != 1)
throw new IOException("" + prolog.getVref());
if (chapter.getProlog() != null)
throw new IOException("More than one prolog found");
FormattedText prologText = new FormattedText();
if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
prologText.trimWhitespace();
prologText.finished();
chapter.setProlog(prologText);
}
} else if (e3 instanceof VERS) {
VERS vers = (VERS) e3;
int vnumber = vers.getVnumber().intValue();
if (lastvref != -1) {
if (lastvref != vnumber)
throw new IOException(lastvref + " != " + vnumber);
lastvref = -1;
}
Verse verse = new Verse("" + vnumber);
Visitor<RuntimeException> visitor = verse.getAppendVisitor();
boolean contentFound = false;
if (headlineBuffer.size() > 0) {
for (Headline h : headlineBuffer) {
h.accept(visitor.visitHeadline(h.getDepth()));
}
headlineBuffer.clear();
contentFound = true;
}
contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
if (!contentFound) {
visitEmptyMarker(visitor);
}
verse.trimWhitespace();
chapter.getVerses().add(verse);
} else {
throw new IOException(e3.getClass().toString());
}
}
for (Verse v : chapter.getVerses()) v.finished();
}
result.getBooks().add(book);
}
return result;
}
use of biblemulticonverter.data.FormattedText in project BibleMultiConverter by schierlm.
the class OSIS method parseFormattedText.
protected void parseFormattedText(String verseName, Element root, FormattedText ft) {
root.normalize();
for (Node node = root.getFirstChild(); node != null; node = node.getNextSibling()) {
if (node instanceof Text) {
String text = node.getTextContent().replaceAll("[ \r\n\t]+", " ");
if (text.startsWith(" ") && (node.getPreviousSibling() == null || Arrays.asList("brp", "lb", "title").contains(node.getPreviousSibling().getNodeName()))) {
printWarning("WARNING: Whitespace at beginning of verse or after title/newline");
text = text.substring(1);
}
Node ns = node.getNextSibling();
while (ns != null && Arrays.asList("w", "q").contains(ns.getNodeName()) && ns.getFirstChild() == null) ns = ns.getNextSibling();
if (text.endsWith(" ") && (ns == null || Arrays.asList("brp", "lb", "title").contains(ns.getNodeName()))) {
printWarning("WARNING: Whitespace at end of verse or after title/newline");
text = text.substring(0, text.length() - 1);
}
if (text.length() > 0)
ft.getAppendVisitor().visitText(text);
} else {
Element elem = (Element) node;
if (elem.getNodeName().equals("title")) {
Headline hl = new Headline(2);
if (elem.getAttribute("type").equals("chapter")) {
hl = new Headline(1);
}
if (elem.getChildNodes().getLength() == 1 && elem.getFirstChild() instanceof Text) {
String text = elem.getFirstChild().getTextContent();
if (!text.equals(text.trim())) {
printWarning("WARNING: Whitespace at beginning/end of headline: '" + text + "'");
elem.getFirstChild().setNodeValue(text.trim());
}
}
parseStructuredTextChildren(hl.getAppendVisitor(), elem);
if (hl.getElementTypes(1).length() == 0) {
printWarning("WARNING: Empty headline in " + verseName);
} else {
hl.accept(ft.getAppendVisitor().visitHeadline(hl.getDepth()));
}
} else {
parseStructuredTextElement(ft.getAppendVisitor(), elem);
}
}
}
}
Aggregations