use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.
the class ZefaniaXMLMyBible method doExport.
@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
new StrippedDiffable().mergeIntroductionPrologs(bible);
final ObjectFactory f = new ObjectFactory();
XMLBIBLE doc = f.createXMLBIBLE();
doc.setBiblename(bible.getName());
doc.setType(EnumModtyp.X_BIBLE);
BigInteger revision = null;
MetadataBook metadata = bible.getMetadataBook();
if (metadata != null) {
for (MetadataBookKey key : Arrays.asList(MetadataBookKey.revision, MetadataBookKey.version, MetadataBookKey.date, MetadataBookKey.title)) {
String digits = metadata.getValue(key);
if (digits == null)
continue;
digits = digits.replaceAll("[^0-9]+", "");
if (!digits.isEmpty()) {
revision = new BigInteger(digits);
break;
}
}
}
if (revision == null) {
String digits = bible.getName().replaceAll("[^0-9]+", "");
if (!digits.isEmpty()) {
revision = new BigInteger(digits);
}
}
if (revision != null) {
doc.setRevision(revision);
}
doc.setINFORMATION(f.createINFORMATION());
List<DIV> prologs = new ArrayList<DIV>();
for (Book bk : bible.getBooks()) {
if (bk.getId().equals(BookID.METADATA))
continue;
int bsnumber = bk.getId().getZefID();
final BIBLEBOOK book = f.createBIBLEBOOK();
book.setBnumber(BigInteger.valueOf(bsnumber));
book.setBname(bk.getShortName());
book.setBsname(bk.getAbbr());
doc.getBIBLEBOOK().add(book);
int cnumber = 0;
for (Chapter cch : bk.getChapters()) {
cnumber++;
if (cch.getProlog() != null) {
DIV xx = f.createDIV();
prologs.add(xx);
NOTE xxx = f.createNOTE();
xx.setNOTE(xxx);
xxx.setType("x-studynote");
NOTE prolog = xxx;
DIV vers = f.createDIV();
prolog.getContent().add("<p>");
prolog.getContent().add(vers);
prolog.getContent().add("</p>");
vers.setNOTE(f.createNOTE());
final List<List<Object>> targetStack = new ArrayList<List<Object>>();
targetStack.add(vers.getNOTE().getContent());
cch.getProlog().accept(new Visitor<IOException>() {
@Override
public Visitor<IOException> visitHeadline(int depth) throws IOException {
if (depth > 6)
depth = 6;
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
targetStack.get(0).add("<h" + depth + ">");
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
targetStack.get(0).add("</h" + depth + ">");
targetStack.add(0, s.getContent());
return this;
}
@Override
public void visitVerseSeparator() throws IOException {
STYLE x = f.createSTYLE();
x.setCss("color:gray");
x.getContent().add("/");
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
}
@Override
public void visitText(String text) throws IOException {
targetStack.get(0).add(text);
}
@Override
public Visitor<IOException> visitFormattingInstruction(FormattedText.FormattingInstructionKind kind) throws IOException {
String startTag, endTag;
if (kind.getHtmlTag() != null) {
startTag = "<" + kind.getHtmlTag() + ">";
endTag = "</" + kind.getHtmlTag() + ">";
} else {
startTag = "<span style=\"" + kind.getCss() + "\">";
endTag = "</span>";
}
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
targetStack.get(0).add(startTag);
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
targetStack.get(0).add(endTag);
targetStack.add(0, s.getContent());
return this;
}
@Override
public Visitor<IOException> visitFootnote() throws IOException {
System.out.println("WARNING: Footnotes in prolog are not supported");
return null;
}
@Override
public Visitor<IOException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws IOException {
System.out.println("WARNING: Cross references in prologs are not supported");
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
targetStack.add(0, s.getContent());
return this;
}
@Override
public Visitor<IOException> visitVariationText(String[] variations) throws IOException {
throw new RuntimeException("Variations not supported");
}
@Override
public void visitLineBreak(LineBreakKind kind) throws IOException {
BR br = f.createBR();
br.setArt(kind == LineBreakKind.PARAGRAPH ? EnumBreak.X_P : EnumBreak.X_NL);
targetStack.get(0).add(" ");
targetStack.get(0).add(kind == LineBreakKind.PARAGRAPH ? "<p>" : "<br>");
targetStack.get(0).add(br);
}
@Override
public Visitor<IOException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
throw new RuntimeException("Grammar tags in prologs not supported");
}
@Override
public FormattedText.Visitor<IOException> visitDictionaryEntry(String dictionary, String entry) throws IOException {
throw new RuntimeException("Dictionary entries in prologs not supported");
}
@Override
public void visitRawHTML(RawHTMLMode mode, String raw) throws IOException {
throw new RuntimeException("Raw HTML in prologs not supported");
}
@Override
public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
targetStack.get(0).add("<span style=\"" + css + "\">");
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
targetStack.get(0).add("</span>");
targetStack.add(0, s.getContent());
return this;
}
@Override
public int visitElementTypes(String elementTypes) throws IOException {
return 0;
}
@Override
public Visitor<IOException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws IOException {
if (prio == ExtraAttributePriority.KEEP_CONTENT)
return visitCSSFormatting("-zef-extra-attribute-" + category + "-" + key + ": " + value);
else if (prio == ExtraAttributePriority.SKIP)
return null;
throw new RuntimeException("Extra attributes not supported");
}
@Override
public void visitStart() throws IOException {
}
@Override
public boolean visitEnd() throws IOException {
targetStack.remove(0);
return false;
}
});
if (targetStack.size() != 0)
throw new RuntimeException();
}
if (cch.getVerses().size() == 0)
continue;
CHAPTER chapter = f.createCHAPTER();
chapter.setCnumber(BigInteger.valueOf(cnumber));
book.getCHAPTER().add(chapter);
for (VirtualVerse vv : cch.createVirtualVerses()) {
for (Headline h : vv.getHeadlines()) {
CAPTION caption = f.createCAPTION();
EnumCaptionType[] types = new EnumCaptionType[] { null, EnumCaptionType.X_H_1, EnumCaptionType.X_H_2, EnumCaptionType.X_H_3, EnumCaptionType.X_H_4, EnumCaptionType.X_H_5, EnumCaptionType.X_H_6, EnumCaptionType.X_H_6, EnumCaptionType.X_H_6, EnumCaptionType.X_H_6 };
caption.setType(types[h.getDepth()]);
caption.setVref(BigInteger.valueOf(vv.getNumber()));
final StringBuilder sb = new StringBuilder();
h.accept(new FormattedText.VisitorAdapter<RuntimeException>(null) {
@Override
protected void beforeVisit() throws RuntimeException {
throw new IllegalStateException();
}
@Override
public Visitor<RuntimeException> visitFormattingInstruction(FormattingInstructionKind kind) throws RuntimeException {
System.out.println("WARNING: Formatting instructions in captions are not supported (stripped)");
return this;
}
@Override
public Visitor<RuntimeException> visitFootnote() throws RuntimeException {
System.out.println("WARNING: Footnotes in captions are not supported (stripped)");
return null;
}
@Override
public Visitor<RuntimeException> visitCSSFormatting(String css) throws RuntimeException {
System.out.println("WARNING: CSS Formatting in captions are not supported (stripped)");
return this;
}
@Override
public Visitor<RuntimeException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws RuntimeException {
return prio.handleVisitor(category, this);
}
public void visitText(String text) throws RuntimeException {
sb.append(text);
}
});
caption.getContent().add(sb.toString());
chapter.getPROLOGOrCAPTIONOrVERS().add(caption);
}
VERS vers = f.createVERS();
vers.setVnumber(BigInteger.valueOf(vv.getNumber()));
for (DIV prolog : prologs) {
vers.getContent().add(prolog);
}
prologs.clear();
chapter.getPROLOGOrCAPTIONOrVERS().add(vers);
boolean first = true;
for (Verse v : vv.getVerses()) {
if (!first || !v.getNumber().equals("" + vv.getNumber())) {
STYLE x = f.createSTYLE();
x.setCss("font-weight: bold");
x.getContent().add("(" + v.getNumber() + ")");
vers.getContent().add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
vers.getContent().add(" ");
}
first = false;
final List<List<Object>> targetStack = new ArrayList<List<Object>>();
targetStack.add(vers.getContent());
v.accept(new FormattedText.Visitor<IOException>() {
@Override
public void visitVerseSeparator() throws IOException {
STYLE x = f.createSTYLE();
x.setCss("color:gray");
x.getContent().add("/");
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
}
@Override
public void visitText(String text) throws IOException {
targetStack.get(0).add(text);
}
@Override
public FormattedText.Visitor<IOException> visitFormattingInstruction(biblemulticonverter.data.FormattedText.FormattingInstructionKind kind) throws IOException {
STYLE x = f.createSTYLE();
x.setCss(kind.getCss());
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
targetStack.add(0, x.getContent());
return this;
}
@Override
public Visitor<IOException> visitFootnote() throws IOException {
DIV x = f.createDIV();
targetStack.get(0).add(x);
NOTE n = f.createNOTE();
x.setNOTE(n);
n.setType("x-studynote");
final List<List<Object>> footnoteStack = new ArrayList<List<Object>>();
footnoteStack.add(n.getContent());
return new Visitor<IOException>() {
@Override
public void visitStart() throws IOException {
}
@Override
public void visitVerseSeparator() throws IOException {
STYLE x = f.createSTYLE();
x.setCss("color:gray");
x.getContent().add("/");
footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
}
@Override
public void visitText(String text) throws IOException {
footnoteStack.get(0).add(text);
}
@Override
public void visitLineBreak(LineBreakKind kind) throws IOException {
BR br = f.createBR();
br.setArt(kind == LineBreakKind.PARAGRAPH ? EnumBreak.X_P : EnumBreak.X_NL);
footnoteStack.get(0).add(" ");
footnoteStack.get(0).add(br);
}
@Override
public Visitor<IOException> visitFormattingInstruction(FormattedText.FormattingInstructionKind kind) throws IOException {
String startTag, endTag;
if (kind.getHtmlTag() != null) {
startTag = "<" + kind.getHtmlTag() + ">";
endTag = "</" + kind.getHtmlTag() + ">";
} else {
startTag = "<span style=\"" + kind.getCss() + "\">";
endTag = "</span>";
}
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
footnoteStack.get(0).add(startTag);
footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
footnoteStack.get(0).add(endTag);
footnoteStack.add(0, s.getContent());
return this;
}
@Override
public Visitor<IOException> visitFootnote() throws IOException {
throw new RuntimeException("Footnotes in footnotes are not supported");
}
@Override
public Visitor<IOException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
GRAM gram = f.createGRAM();
if (strongs != null) {
StringBuilder entryBuilder = new StringBuilder();
for (int i = 0; i < strongs.length; i++) {
entryBuilder.append((i > 0 ? " " : "") + strongs[i]);
}
String entry = entryBuilder.toString();
gram.setStr(entry);
}
if (rmac != null) {
StringBuilder entryBuilder = new StringBuilder();
for (int i = 0; i < rmac.length; i++) {
if (i > 0)
entryBuilder.append(' ');
entryBuilder.append(rmac[i]);
}
gram.setRmac(entryBuilder.toString());
}
footnoteStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
footnoteStack.add(0, gram.getContent());
return this;
}
@Override
public FormattedText.Visitor<IOException> visitDictionaryEntry(String dictionary, String entry) throws IOException {
GRAM gram = f.createGRAM();
gram.setStr(entry);
footnoteStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
footnoteStack.add(0, gram.getContent());
return this;
}
@Override
public void visitRawHTML(RawHTMLMode mode, String raw) throws IOException {
if (mode != RawHTMLMode.ONLINE)
footnoteStack.get(0).add(raw);
}
@Override
public Visitor<IOException> visitVariationText(String[] variations) throws IOException {
throw new RuntimeException("Variations not supported");
}
@Override
public FormattedText.Visitor<IOException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws IOException {
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
int bookID = book.getZefID();
String mscope, xmscope;
try {
int start = firstVerse.equals("^") ? 1 : Integer.parseInt(firstVerse.replaceAll("[a-zG]|[,/][0-9]*", ""));
int end;
if (firstChapter == lastChapter && !lastVerse.equals("$")) {
end = Integer.parseInt(lastVerse.replaceAll("[a-z]|[,/][0-9]*", ""));
} else {
end = -1;
}
mscope = bookID + "," + firstChapter + "," + start + "," + end;
xmscope = bookID + ";" + firstChapter + ";" + start + "-" + end;
} catch (NumberFormatException ex) {
ex.printStackTrace();
mscope = bookID + ",1,1,999";
xmscope = bookID + ";1;1-999";
}
if (footnoteStack.size() == 1) {
List<Object> outerList = targetStack.get(0);
XREF xref = new XREF();
xref.setMscope(xmscope);
outerList.add(outerList.size() - 1, xref);
}
footnoteStack.get(0).add("<a href=\"mybible:content=location&locations=" + mscope + "\">");
footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
footnoteStack.get(0).add("</a>");
footnoteStack.add(0, s.getContent());
return this;
}
public boolean visitEnd() throws IOException {
footnoteStack.remove(0);
return false;
}
@Override
public int visitElementTypes(String elementTypes) throws IOException {
return 0;
}
@Override
public Visitor<IOException> visitHeadline(int depth) throws IOException {
throw new RuntimeException("Headlines in footnotes not supported");
}
@Override
public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
STYLE s = f.createSTYLE();
s.setCss("-zef-dummy: true");
footnoteStack.get(0).add("<span style=\"" + css + "\">");
footnoteStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, s));
footnoteStack.get(0).add("</span>");
footnoteStack.add(s.getContent());
return this;
}
@Override
public Visitor<IOException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws IOException {
System.out.println("WARNING: Extra attributes not supported");
Visitor<IOException> result = prio.handleVisitor(category, this);
if (result != null)
footnoteStack.add(0, footnoteStack.get(0));
return result;
}
};
}
@Override
public FormattedText.Visitor<IOException> visitCrossReference(String bookAbbr, BookID book, int firstChapter, String firstVerse, int lastChapter, String lastVerse) throws IOException {
throw new RuntimeException("Xref outside of footnotes not supported!");
}
@Override
public void visitLineBreak(LineBreakKind kind) throws IOException {
BR br = f.createBR();
br.setArt(kind == LineBreakKind.PARAGRAPH ? EnumBreak.X_P : EnumBreak.X_NL);
targetStack.get(0).add(" ");
targetStack.get(0).add(br);
}
@Override
public Visitor<IOException> visitGrammarInformation(int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
GRAM gram = f.createGRAM();
if (strongs != null) {
StringBuilder entryBuilder = new StringBuilder();
for (int i = 0; i < strongs.length; i++) {
entryBuilder.append((i > 0 ? " " : "") + strongs[i]);
}
String entry = entryBuilder.toString();
gram.setStr(entry);
}
if (rmac != null) {
StringBuilder entryBuilder = new StringBuilder();
for (int i = 0; i < rmac.length; i++) {
if (i > 0)
entryBuilder.append(' ');
entryBuilder.append(rmac[i]);
}
gram.setRmac(entryBuilder.toString());
}
targetStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
targetStack.add(0, gram.getContent());
return this;
}
@Override
public FormattedText.Visitor<IOException> visitDictionaryEntry(String dictionary, String entry) throws IOException {
GRAM gram = f.createGRAM();
gram.setStr(entry);
targetStack.get(0).add(new JAXBElement<GRAM>(new QName("gr"), GRAM.class, gram));
targetStack.add(0, gram.getContent());
return this;
}
@Override
public void visitRawHTML(RawHTMLMode mode, String raw) throws IOException {
throw new RuntimeException("Raw HTML is not supported");
}
@Override
public Visitor<IOException> visitVariationText(String[] variations) throws IOException {
throw new RuntimeException("Variations not supported");
}
@Override
public boolean visitEnd() throws IOException {
targetStack.remove(0);
return false;
}
@Override
public int visitElementTypes(String elementTypes) throws IOException {
return 0;
}
@Override
public Visitor<IOException> visitHeadline(int depth) throws IOException {
throw new RuntimeException("Headline in virtual verse is impossible");
}
@Override
public void visitStart() throws IOException {
}
@Override
public Visitor<IOException> visitCSSFormatting(String css) throws IOException {
STYLE x = f.createSTYLE();
x.setCss(css);
targetStack.get(0).add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, x));
targetStack.add(0, x.getContent());
return this;
}
@Override
public Visitor<IOException> visitExtraAttribute(ExtraAttributePriority prio, String category, String key, String value) throws IOException {
System.out.println("WARNING: Extra attributes not supported");
Visitor<IOException> result = prio.handleVisitor(category, this);
if (result != null)
targetStack.add(0, targetStack.get(0));
return result;
}
});
if (targetStack.size() != 0)
throw new RuntimeException();
}
}
}
if (book.getCHAPTER().size() == 0) {
doc.getBIBLEBOOK().remove(book);
}
}
final Document docc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
JAXBContext.newInstance(ObjectFactory.class.getPackage().getName()).createMarshaller().marshal(doc, docc);
docc.getDocumentElement().setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
docc.getDocumentElement().setAttribute("xsi:noNamespaceSchemaLocation", "zef2005.xsd");
docc.normalize();
maskWhitespaceNodes(docc.getDocumentElement());
try (FileOutputStream fos = new FileOutputStream(exportArgs[0])) {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
transformer.transform(new DOMSource(docc), new StreamResult(fos));
}
}
use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.
the class ZefaniaXMLRoundtrip method parseBible.
protected Bible parseBible(XMLBIBLE doc) throws Exception {
Bible result = new Bible(doc.getBiblename());
MetadataBook metadata = new MetadataBook();
if (doc.getStatus() != null) {
metadata.setValue(MetadataBookKey.status, doc.getStatus().value());
}
if (doc.getVersion() != null) {
metadata.setValue(MetadataBookKey.version, doc.getVersion());
}
if (doc.getRevision() != null) {
metadata.setValue(MetadataBookKey.revision, doc.getRevision().toString());
}
for (JAXBElement<?> elem : doc.getINFORMATION().getTitleOrCreatorOrDescription()) {
if (elem.getValue() == null)
continue;
String value = normalize(elem.getValue().toString(), true).trim();
if (value.length() == 0)
value = "-empty-";
metadata.setValue(elem.getName().getLocalPart(), value);
}
metadata.finished();
if (metadata.getKeys().size() > 0)
result.getBooks().add(metadata.getBook());
Set<String> abbrs = new HashSet<String>();
Set<String> shortnames = new HashSet<String>();
Map<BookID, String> abbrMap = new EnumMap<BookID, String>(BookID.class);
List<BIBLEBOOK> nl = doc.getBIBLEBOOK();
for (BIBLEBOOK e : nl) {
String shortname = e.getBsname();
int number = e.getBnumber().intValue();
BookID bookID = BookID.fromZefId(number);
if (shortname == null)
shortname = "_" + bookID.getOsisID();
else if (shortname.length() == 0)
shortname = "_" + bookID.getOsisID() + "[[]]";
String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
abbr = "X" + abbr;
if (abbr.length() == 1)
abbr += "x";
if (abbrs.contains(abbr)) {
for (int i = 2; i < 100; i++) {
if (!abbrs.contains(abbr + i)) {
abbr = abbr + i;
break;
}
}
}
abbrs.add(abbr);
abbrMap.put(bookID, abbr);
}
abbrs.clear();
for (BIBLEBOOK e : nl) {
String shortname = e.getBsname();
String longname = e.getBname();
int number = e.getBnumber().intValue();
BookID bookID = BookID.fromZefId(number);
if (shortname == null)
shortname = "_" + bookID.getOsisID();
else if (shortname.length() == 0)
shortname = "_" + bookID.getOsisID() + "[[]]";
if (longname == null)
longname = "_" + bookID.getEnglishName();
else if (longname.length() == 0)
longname = "_" + bookID.getEnglishName() + "[[]]";
else
longname = longname.replaceAll(" ++", " ").trim();
String abbr = shortname.replaceAll("[^A-Z0-9a-zäöü]++", "");
if (abbr.length() == 0 || Character.isLowerCase(abbr.charAt(0)))
abbr = "X" + abbr;
if (abbr.length() == 1)
abbr += "x";
if (abbrs.contains(abbr)) {
for (int i = 2; i < 100; i++) {
if (!abbrs.contains(abbr + i)) {
abbr = abbr + i;
break;
}
}
}
abbrs.add(abbr);
if (shortname.equals("Gen") && longname.equals("Genesis") && bookID == BookID.BOOK_Exod) {
System.out.println("WARNING: Book number " + bookID.getZefID() + " has name " + longname);
shortname = "Exo[[Gen]]";
longname = "Exodus[[Genesis]]";
}
if (shortname.equals("1Chr") && longname.equals("2 Chronicles")) {
System.out.println("WARNING: Book name 2 Chronicles has short name 1Chr");
shortname = "2Chr[[1Chr]]";
}
if (shortnames.contains(shortname)) {
System.out.println("WARNING: Duplicate short name " + shortname);
for (int i = 2; i < 100; i++) {
if (!shortnames.contains(shortname + i + "[[" + shortname + "]]")) {
shortname = shortname + i + "[[" + shortname + "]]";
break;
}
}
}
shortnames.add(shortname);
Book book = new Book(abbr, bookID, shortname, longname);
int lastvref = -1;
List<Headline> headlineBuffer = new ArrayList<Headline>();
for (CHAPTER e2 : e.getCHAPTER()) {
int chapterNumber = e2.getCnumber().intValue();
while (book.getChapters().size() < chapterNumber) book.getChapters().add(new Chapter());
Chapter chapter = book.getChapters().get(chapterNumber - 1);
for (Object e3 : e2.getPROLOGOrCAPTIONOrVERS()) {
if (e3 instanceof CAPTION) {
CAPTION caption = (CAPTION) e3;
if (lastvref != -1 && lastvref != caption.getVref().intValue())
throw new IOException();
lastvref = caption.getVref().intValue();
int level;
if (caption.getType() == null) {
level = 9;
} else {
switch(caption.getType()) {
case X_H_1:
level = 1;
break;
case X_H_2:
level = 2;
break;
case X_H_3:
level = 3;
break;
case X_H_4:
level = 4;
break;
case X_H_5:
level = 5;
break;
case X_H_6:
level = 6;
break;
default:
throw new IOException();
}
}
Headline h = new Headline(level);
headlineBuffer.add(h);
if (!parseContent(h.getAppendVisitor(), caption.getContent(), abbrMap)) {
visitEmptyMarker(h.getAppendVisitor());
} else {
h.trimWhitespace();
}
h.finished();
} else if (e3 instanceof REMARK) {
REMARK remark = (REMARK) e3;
int vref = remark.getVref().intValue();
int idx = chapter.getVerseIndex("" + vref);
if (idx == -1)
throw new IOException(vref + ":" + remark.getContent());
Verse v = chapter.getVerses().get(idx);
if (remark.getContent().size() != 1)
throw new IOException();
String remarkText = normalize((String) remark.getContent().get(0), true).trim();
v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "remark").visitFootnote().visitText(remarkText);
} else if (e3 instanceof XREF) {
XREF xref = (XREF) e3;
int vref = xref.getVref().intValue();
int idx = chapter.getVerseIndex("" + vref);
if (idx == -1)
throw new IOException(vref + ":" + xref.getMscope());
Verse v = chapter.getVerses().get(idx);
Visitor<RuntimeException> footnoteVisitor = v.getAppendVisitor().visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "zefania", "footnote-source", "outer-xref").visitFootnote();
boolean first = true;
for (String mscope : xref.getMscope().split(" ")) {
Matcher m = Utils.compilePattern("([0-9]+);([0-9]+)(-[0-9]+)?;([0-9]+)(-[0-9]+)?").matcher(mscope);
if (!m.matches())
throw new IOException(mscope);
BookID xrefBookID = BookID.fromZefId(Integer.parseInt(m.group(1)));
int xrefChapter = Integer.parseInt(m.group(2)), endChapter = xrefChapter;
if (m.group(3) != null)
endChapter = Integer.parseInt(m.group(3).substring(1));
String verse = m.group(4);
if (verse.equals("0"))
verse = "1//G";
String endVerse = m.group(5);
if (endVerse == null)
endVerse = verse;
else
endVerse = endVerse.substring(1);
if (endVerse.equals("0"))
endVerse = "1//G";
String xrefAbbr = abbrMap.get(xrefBookID);
if (xrefAbbr == null)
xrefAbbr = xrefBookID.getOsisID();
if (first)
first = false;
else
footnoteVisitor.visitText(" ");
if (xrefChapter == endChapter && !verse.equals("1//G") && !endVerse.equals("1//G") && Integer.parseInt(verse) > Integer.parseInt(endVerse)) {
String tmp = verse;
verse = endVerse;
endVerse = tmp;
}
footnoteVisitor.visitCrossReference(xrefAbbr, xrefBookID, xrefChapter, verse, endChapter, endVerse).visitText(xrefAbbr + " " + xrefChapter + ":" + verse);
}
} else if (e3 instanceof PROLOG) {
PROLOG prolog = (PROLOG) e3;
if (prolog.getVref().intValue() != 1)
throw new IOException("" + prolog.getVref());
if (chapter.getProlog() != null)
throw new IOException("More than one prolog found");
FormattedText prologText = new FormattedText();
if (parseContent(prologText.getAppendVisitor(), prolog.getContent(), abbrMap)) {
prologText.trimWhitespace();
prologText.finished();
chapter.setProlog(prologText);
}
} else if (e3 instanceof VERS) {
VERS vers = (VERS) e3;
int vnumber = vers.getVnumber().intValue();
if (lastvref != -1) {
if (lastvref != vnumber)
throw new IOException(lastvref + " != " + vnumber);
lastvref = -1;
}
Verse verse = new Verse("" + vnumber);
Visitor<RuntimeException> visitor = verse.getAppendVisitor();
boolean contentFound = false;
if (headlineBuffer.size() > 0) {
for (Headline h : headlineBuffer) {
h.accept(visitor.visitHeadline(h.getDepth()));
}
headlineBuffer.clear();
contentFound = true;
}
contentFound |= parseContent(visitor, vers.getContent(), abbrMap);
if (!contentFound) {
visitEmptyMarker(visitor);
}
verse.trimWhitespace();
chapter.getVerses().add(verse);
} else {
throw new IOException(e3.getClass().toString());
}
}
for (Verse v : chapter.getVerses()) v.finished();
}
result.getBooks().add(book);
}
return result;
}
use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.
the class ZefaniaXMLRoundtrip method createXMLBible.
protected XMLBIBLE createXMLBible(Bible bible) throws Exception {
ObjectFactory of = new ObjectFactory();
XMLBIBLE doc = of.createXMLBIBLE();
doc.setBiblename(bible.getName());
doc.setType(EnumModtyp.X_BIBLE);
doc.setINFORMATION(of.createINFORMATION());
MetadataBook metadata = bible.getMetadataBook();
if (metadata != null) {
for (String key : metadata.getKeys()) {
String value = metadata.getValue(key);
if (value.equals("-empty-"))
value = "";
if (key.equals(MetadataBookKey.status.toString())) {
doc.setStatus(EnumStatus.fromValue(value));
} else if (key.equals(MetadataBookKey.version.toString())) {
doc.setVersion(value);
} else if (key.equals(MetadataBookKey.revision.toString())) {
doc.setRevision(new BigInteger(value));
} else if (Arrays.asList(INFORMATION_KEYS).contains(key)) {
doc.getINFORMATION().getTitleOrCreatorOrDescription().add(new JAXBElement<String>(new QName(key), String.class, value));
}
}
}
for (Book bk : bible.getBooks()) {
if (bk.getId().equals(BookID.METADATA))
continue;
if (bk.getId().getZefID() <= 0) {
System.out.println("WARNING: Unable to export book " + bk.getAbbr());
continue;
}
String shortname = removeRoundtripMarker(bk.getShortName());
String longname = removeRoundtripMarker(bk.getLongName());
BookID bookID = bk.getId();
BIBLEBOOK bb = of.createBIBLEBOOK();
bb.setBnumber(BigInteger.valueOf(bookID.getZefID()));
if (!shortname.equals("_" + bookID.getOsisID()))
bb.setBsname(shortname);
if (!longname.equals("_" + bookID.getEnglishName()))
bb.setBname(longname);
int cnumber = 0;
for (Chapter ccc : bk.getChapters()) {
cnumber++;
if (ccc.getVerses().size() == 0)
continue;
CHAPTER cc = of.createCHAPTER();
cc.setCnumber(BigInteger.valueOf(cnumber));
bb.getCHAPTER().add(cc);
if (ccc.getProlog() != null) {
PROLOG prolog = of.createPROLOG();
prolog.setVref(BigInteger.ONE);
ccc.getProlog().accept(new CreateContentVisitor(of, prolog.getContent(), null, 0, null));
cc.getPROLOGOrCAPTIONOrVERS().add(prolog);
}
for (VirtualVerse vv : ccc.createVirtualVerses()) {
for (Headline h : vv.getHeadlines()) {
CAPTION caption = of.createCAPTION();
caption.setVref(BigInteger.valueOf(vv.getNumber()));
h.accept(new CreateContentVisitor(of, caption.getContent(), null, 0, null));
EnumCaptionType[] types = new EnumCaptionType[] { null, EnumCaptionType.X_H_1, EnumCaptionType.X_H_2, EnumCaptionType.X_H_3, EnumCaptionType.X_H_4, EnumCaptionType.X_H_5, EnumCaptionType.X_H_6, null, null, null };
caption.setType(types[h.getDepth()]);
cc.getPROLOGOrCAPTIONOrVERS().add(caption);
}
List<Object> remarksAndXrefs = new ArrayList<Object>();
VERS vers = of.createVERS();
vers.setVnumber(BigInteger.valueOf(vv.getNumber()));
for (Verse v : vv.getVerses()) {
if (!v.getNumber().equals("" + vv.getNumber())) {
STYLE verseNum = of.createSTYLE();
verseNum.setCss("font-weight: bold");
verseNum.getContent().add("(" + v.getNumber() + ")");
vers.getContent().add(new JAXBElement<STYLE>(new QName("STYLE"), STYLE.class, verseNum));
vers.getContent().add(" ");
}
v.accept(new CreateContentVisitor(of, vers.getContent(), remarksAndXrefs, vv.getNumber(), null));
}
cc.getPROLOGOrCAPTIONOrVERS().add(vers);
cc.getPROLOGOrCAPTIONOrVERS().addAll(remarksAndXrefs);
}
}
doc.getBIBLEBOOK().add(bb);
}
return doc;
}
use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.
the class NeUeParser method doImport.
@Override
public Bible doImport(File inputDirectory) throws Exception {
Bible bible = new Bible("NeÜ bibel.heute (Neue evangelistische Übersetzung)");
MetadataBook metadata = new MetadataBook();
metadata.setValue(MetadataBookKey.description, "Neue evangelistische Übersetzung (NeÜ), eine Übertragung der Bibel ins heutige Deutsch.");
metadata.setValue(MetadataBookKey.rights, "Copyright (c) Karl-Heinz Vanheiden, Ahornweg 3, 07926 Gefell. Sofern keine anderslautende schriftliche Genehmigung des Rechteinhabers vorliegt, darf dieses Werk zu privaten und gemeindlichen Zwecken verwendet, aber nicht verändert oder weitergegeben werden. " + "Eine Weitergabe auf körperlichen Datenträgern (Papier, CD, DVD, Stick o.ä.) bedarf zusätzlich einer Genehmigung der Christlichen Verlagsgesellschaft Dillenburg (http://cv-dillenburg.de/).");
metadata.setValue(MetadataBookKey.source, "http://www.derbibelvertrauen.de/");
metadata.setValue(MetadataBookKey.publisher, "Karl-Heinz Vanheiden");
metadata.setValue(MetadataBookKey.language, "GER");
bible.getBooks().add(metadata.getBook());
String mainFile = "NeUe.htm";
if (!new File(inputDirectory, mainFile).exists())
mainFile = "index.htm";
try (BufferedReader br = createReader(inputDirectory, mainFile)) {
String line = br.readLine().trim();
while (!line.startsWith("<p class=\"u3\">")) {
if (line.contains("Textstand: ")) {
line = line.substring(line.indexOf("Textstand: ") + 11);
line = line.substring(0, line.indexOf('<'));
metadata.setValue(MetadataBookKey.version, line);
metadata.setValue(MetadataBookKey.date, new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
metadata.setValue(MetadataBookKey.revision, line.replaceAll("[^0-9]+", ""));
metadata.finished();
}
line = br.readLine().trim();
}
Pattern tocPattern = Pattern.compile("<a href=\"([^\"]+)\">([^<>]+)</a> (?:</p>)?");
int bookIndex = 0, jcIndex = 0;
while (!line.startsWith("<a name=\"vorwort\">")) {
if (line.equals("<br>")) {
line = br.readLine().trim();
if (line.startsWith("»» "))
line = line.substring("»» ".length());
}
Matcher m = tocPattern.matcher(line);
if (m.matches()) {
String url = m.group(1);
String shortName = replaceEntities(m.group(2));
if (url.endsWith(".html#bb")) {
String filename = url.substring(0, url.length() - 8);
BookMetadata bm = METADATA[bookIndex];
if (!bm.filename.equals(filename))
throw new IOException(filename + "/" + bm.filename);
bm.shortname = shortName;
bookIndex++;
} else if (url.startsWith("0")) {
if (!url.equals(JESUS_CHRONIK[jcIndex] + ".html"))
throw new IOException(url + "/" + JESUS_CHRONIK[jcIndex]);
jcIndex++;
} else {
throw new IOException(url);
}
} else if (line.length() != 0 && !line.startsWith("<p class=\"u3\">") && !line.startsWith("///") && !line.equals("<p> </p>") && !line.equals("<p><a name=\"bb\"> </a></p>")) {
throw new IOException(line);
}
line = br.readLine().trim();
}
if (bookIndex != METADATA.length)
throw new IOException(bookIndex + " != " + METADATA.length);
if (jcIndex == 0)
JESUS_CHRONIK = new String[0];
if (jcIndex != JESUS_CHRONIK.length)
throw new IOException(jcIndex + " != " + JESUS_CHRONIK.length);
// Vorwort
Book vorwort = new Book("Vorwort", BookID.INTRODUCTION, "Vorwort", "Vorwort des Übersetzers");
bible.getBooks().add(vorwort);
Visitor<RuntimeException> vv = getPrologVisitor(vorwort);
boolean needParagraph = false;
if (line.endsWith("</a><br>"))
line = br.readLine().trim();
while (!line.startsWith("<div align=\"right\">")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
if (!vorwort.getLongName().equals(replaceEntities(cutAffix(line, "<h2>", "</h2>"))))
throw new IOException(replaceEntities(cutAffix(line, "<h2>", "</h2>")));
} else if (line.startsWith("<h4>")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line, "<h4>", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<h4 id=")) {
parseFormattedText(vv.visitHeadline(1), cutAffix(line.replaceFirst("<h4 id=\"[a-z]+\">(</a>)?", ""), "<a href=\"#vorwort\"> /^\\</a> ", "</h4>"), null, null);
needParagraph = false;
} else if (line.startsWith("<div class=\"fn\">")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"fn\">", "</div>"), null, null);
} else if (line.startsWith("<p>")) {
if (needParagraph)
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
needParagraph = true;
if (line.endsWith("<br />"))
line += br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "<p>", "</p>"), null, null);
} else if (line.equals("<ul>")) {
while (!line.equals("</ul>")) {
line = br.readLine();
}
} else {
throw new IOException(line);
}
line = skipLines(br, "<p> </p>");
}
vorwort.getChapters().get(0).getProlog().finished();
}
for (BookMetadata bm : METADATA) {
if (!new File(inputDirectory, bm.filename + ".html").exists()) {
System.out.println("*** Skipping " + bm.filename + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, bm.filename + ".html")) {
String line = br.readLine().trim();
line = skipLines(br, "<html>", "<head>", "<title>", "<meta ", "<link ", "</head>", "<body>", "<div style=\"background-color: #DCC2A0;\">", "<table border=", "<tbody ", "<tr><td>", "<p class=\"u3\">", "<a href=\"", "\\\\\\", "<br>", "»»");
if (!line.equals("<p><a name=\"bb\"> </a></p>") && !line.equals("<p><a id=\"bb\"> </a></p>"))
throw new IOException(line);
line = skipLines(br);
if (line.equals("<p> </p>"))
line = br.readLine().trim();
Book bk = new Book(bm.abbr, bm.id, bm.shortname, replaceEntities(cutAffix(line, "<h1>", "</h1>")));
bible.getBooks().add(bk);
line = skipLines(br, "<p class=\"u3\">", "<a href=\"#", "</p>", "<p> </p>");
FormattedText prolog = new FormattedText();
prolog.getAppendVisitor().visitHeadline(1).visitText(replaceEntities(cutAffix(line, "<p class=\"u0\">", "</p>")));
line = skipLines(br);
boolean firstProlog = true;
while (line.startsWith("<div class=\"e\">") && line.endsWith("</div>")) {
if (firstProlog) {
firstProlog = false;
} else {
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
}
parseFormattedText(prolog.getAppendVisitor(), cutAffix(line, "<div class=\"e\">", "</div>"), bm, null);
line = skipLines(br);
}
if (firstProlog)
throw new IOException(line);
prolog.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
parseFormattedText(prolog.getAppendVisitor().visitFormattingInstruction(FormattingInstructionKind.BOLD).visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<p class=\"u1\">", "</p>"), bm, null);
prolog.finished();
line = skipLines(br);
if (!line.startsWith("<h"))
throw new IOException(line);
char minHeadline = line.charAt(2);
List<Headline> headlines = new ArrayList<>();
boolean inParagraph = false;
Chapter currentChapter = null;
Verse currentVerse = null;
List<Visitor<RuntimeException>> footnotes = new ArrayList<>();
List<String> footnoteVerses = new ArrayList<>();
while (!line.equals("<hr>")) {
if (line.startsWith("<p> </p>")) {
line = line.substring(13).trim();
if (line.length() == 0)
line = skipLines(br);
continue;
}
String restLine = null;
List<Visitor<RuntimeException>> newFootnotes = new ArrayList<>();
while (line.matches("<[a-z0-9]+ (class=\"[^\"]+\" )?id=\"[a-z0-9]+\"[> ].*")) line = line.replaceFirst(" id=\"[a-z0-9]+\"", "");
if (line.startsWith("<p class=\"poet\">") || line.startsWith("<p class=\"einl\">")) {
line = "<p>" + line.substring(16);
}
if (line.matches(".*</p>.+")) {
int pos = line.indexOf("</p>");
restLine = line.substring(pos + 4).trim();
line = line.substring(0, pos + 4);
}
if (!inParagraph && line.startsWith("<p>")) {
inParagraph = true;
line = line.substring(3).trim();
if (line.length() == 0) {
line = skipLines(br);
continue;
}
}
if (line.indexOf("<span class=\"vers\">", 1) != -1) {
int pos = line.indexOf("<span class=\"vers\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
if (line.indexOf("<p class=\"poet\">", 1) != -1) {
int pos = line.indexOf("<p class=\"poet\">", 1);
restLine = line.substring(pos) + (restLine == null ? "" : restLine);
line = line.substring(0, pos).trim();
}
while (line.endsWith(" ")) line = line.substring(0, line.length() - 6);
if (!inParagraph && (line.startsWith("<h2>") || line.startsWith("<h3>") || line.startsWith("<h4>"))) {
Headline hl = new Headline(line.charAt(2) - minHeadline + 1);
String headline = cutAffix(line, line.substring(0, 4), "</" + line.substring(1, 4));
if (headline.contains("*"))
throw new IOException(headline);
hl.getAppendVisitor().visitText(replaceEntities(headline));
headlines.add(hl);
} else if (inParagraph && line.startsWith("<span class=\"vers\">")) {
int pos = line.indexOf("</span>");
if (pos == -1)
throw new IOException(line);
String vs = line.substring(19, pos).trim();
if (vs.endsWith(" ")) {
vs = cutAffix(vs, "", " ");
}
if (vs.matches("[0-9]+(,[0-9]+)?")) {
currentVerse = new Verse(vs);
} else {
throw new IOException(vs);
}
line = line.substring(pos + 7);
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
if (line.startsWith(" ")) {
line = line.substring(6);
}
for (Headline h : headlines) {
h.accept(currentVerse.getAppendVisitor().visitHeadline(h.getDepth()));
}
headlines.clear();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
currentChapter.getVerses().add(currentVerse);
} else if (inParagraph && line.startsWith("<a href=\"#top\"><span class=\"kap\">")) {
int chap = Integer.parseInt(cutAffix(line, "<a href=\"#top\"><span class=\"kap\">", "</span></a>"));
currentChapter = new Chapter();
currentVerse = null;
bk.getChapters().add(currentChapter);
if (chap != bk.getChapters().size())
throw new IOException(chap + "/" + bk.getChapters().size());
if (prolog != null) {
currentChapter.setProlog(prolog);
prolog = null;
}
} else if (!inParagraph && line.startsWith("<div class=\"fn\">")) {
String content = cutAffix(line, "<div class=\"fn\">", "</div>");
if (footnoteVerses.size() == 0)
throw new IOException(line);
String prefix = footnoteVerses.remove(0) + ":";
if (!content.startsWith(prefix)) {
throw new IOException(prefix + " / " + content);
}
parseFormattedText(footnotes.remove(0), content.substring(prefix.length()).trim(), bm, null);
} else if (inParagraph && !line.isEmpty() && (!line.startsWith("<") && !line.startsWith(" ") || line.startsWith("<span class=\"u2\">"))) {
if (line.endsWith("</p>")) {
inParagraph = false;
line = line.substring(0, line.length() - 4);
}
line = line.trim();
parseFormattedText(currentVerse.getAppendVisitor(), line, bm, newFootnotes);
if (!inParagraph)
currentVerse.getAppendVisitor().visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
System.err.println("Next line: " + br.readLine());
throw new IOException(line);
}
if (!newFootnotes.isEmpty()) {
footnotes.addAll(newFootnotes);
for (int i = 0; i < newFootnotes.size(); i++) {
if (currentVerse.getNumber().contains(",")) {
footnoteVerses.add(currentVerse.getNumber());
} else {
footnoteVerses.add(bk.getChapters().size() + "," + currentVerse.getNumber());
}
}
}
if (restLine != null)
line = restLine;
else
line = skipLines(br);
}
if (!headlines.isEmpty())
throw new IOException("" + headlines.size());
if (!footnotes.isEmpty() || !footnoteVerses.isEmpty())
throw new IOException(footnotes.size() + "/" + footnoteVerses.size());
for (Chapter ch : bk.getChapters()) {
for (Verse vv : ch.getVerses()) {
vv.trimWhitespace();
vv.finished();
}
}
}
}
// Anhang
Book anhang = new Book("Anhang", BookID.APPENDIX, "Anhang", "Anhang");
bible.getBooks().add(anhang);
Visitor<RuntimeException> vv = getPrologVisitor(anhang);
vv.visitHeadline(1).visitText("Ausblick auf die ganze Bibel");
try (BufferedReader br = createReader(inputDirectory, "bibel.html")) {
String line = br.readLine().trim();
while (!line.startsWith("<a name=\"at\">")) {
line = br.readLine().trim();
}
while (!line.equals("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
line = line.replaceAll("> +<", "><");
line = line.replace("<td valign=\"top\"><br /><br /><a href", "<td valign=\"top\"><a href");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<a href=\"#top\"><h2>", "</h2></a>"), null, null);
} else if (line.startsWith("<h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<h3>", "</h3>"), null, null);
} else if (line.startsWith("<a href=\"#top\"><h3>")) {
parseFormattedText(vv.visitHeadline(3), cutAffix(line, "<a href=\"#top\"><h3>", "</h3></a>"), null, null);
} else if (line.startsWith("<td valign=\"top\"><a href=\"")) {
String[] parts = cutAffix(line, "<td valign=\"top\"><a href=\"", "</a></td>").split(".html\">", 2);
line = br.readLine().trim().replaceAll("> +<", "><").replace("html#u", "html");
if (line.contains("<td><br /><br /><a href")) {
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = line.replace("<td><br /><br /><a href", "<td><a href");
}
String title = cutAffix(line, "<td><a href=\"" + parts[0] + ".html\">", "</a><br />");
Visitor<RuntimeException> bold = vv.visitFormattingInstruction(FormattingInstructionKind.BOLD);
BookMetadata m = null;
for (BookMetadata bm : METADATA) {
if (bm.filename.equals(parts[0])) {
m = bm;
break;
}
}
bold.visitCrossReference(m.abbr, m.id, 1, "1", 1, "1").visitText(replaceEntities(parts[1].replace("-", "")));
bold.visitText(" " + replaceEntities(title));
vv.visitLineBreak(LineBreakKind.NEWLINE);
line = br.readLine().trim();
while (!line.endsWith("</td>")) line += " " + br.readLine().trim();
vv.visitText(replaceEntities(cutAffix(line, "", "</td>")));
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
line = br.readLine().trim();
if (!line.equals("</tr>"))
throw new IOException(line);
} else {
throw new IOException(line);
}
line = skipLines(br, "<table border=\"0\" width=\"350\">", "<colgroup>", "<p> </p><p> </p><p> </p><p> </p>", "<p> </p>", "</div", "</td></tr>", "</tbody>", "</colgroup>", "<col ", "<tr>", "</table>");
}
}
// Hesekiels Tempel
vv.visitHeadline(1).visitText("Hesekiels Tempel");
Visitor<RuntimeException> vvv = vv.visitFormattingInstruction(FormattingInstructionKind.LINK);
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "<a href=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" target=\"_blank\">");
vvv.visitFormattingInstruction(FormattingInstructionKind.BOLD).visitText("Rekonstruktionszeichnung");
vvv.visitRawHTML(RawHTMLMode.OFFLINE, "</a>");
vv.visitRawHTML(RawHTMLMode.ONLINE, "<br /><img src=\"http://www.alt.kh-vanheiden.de/NeUe/Bibeltexte/Hesekiels%20Tempel.gif\" width=\"640\" height=\"635\">");
// Jesus-Chronik
if (JESUS_CHRONIK.length > 0)
vv.visitHeadline(1).visitText("Die Jesus-Chronik");
for (String name : JESUS_CHRONIK) {
if (!new File(inputDirectory, name + ".html").exists()) {
System.out.println("*** Skipping " + name + " - file not found ***");
continue;
}
try (BufferedReader br = createReader(inputDirectory, name + ".html")) {
String line = skipLines(br, "<html>", "<head>", "<title> Die Jesus-Biografie</title>", "<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">", "</head>", "<body>");
List<Visitor<RuntimeException>> footnoteList = new ArrayList<>();
List<String> footnotePrefixes = new ArrayList<>();
while (!line.startsWith("</body>")) {
line = line.replaceAll("<a name=\"[a-z]+\"></a>", "");
if (line.startsWith("<h2>")) {
parseFormattedText(vv.visitHeadline(2), cutAffix(line, "<h2>", "</h2>"), null, null);
} else if (line.startsWith("<div class=\"fn\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
String[] fns = cutAffix(line, "<div class=\"fn\">", "</div>").split("<br />");
for (String fn : fns) {
fn = fn.trim();
String pfx = footnotePrefixes.remove(0);
Visitor<RuntimeException> fnv = footnoteList.remove(0);
if (!fn.startsWith(pfx))
throw new IOException(pfx + " / " + fn);
parseFormattedText(fnv, cutAffix(fn, pfx, ""), null, null);
}
} else if (line.startsWith("<p><div class=\"rot\">")) {
String text = cutAffix(line, "<p><div class=\"rot\">", "<!--/DATE--></div></p>").replace("<!--DATE-->", "");
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), text, null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p><b>") && line.contains("</b><br />")) {
int pos = line.indexOf("</b><br />");
parseJesusChronikText(vv.visitHeadline(3), line.substring(6, pos), footnotePrefixes, footnoteList);
String xref = cutAffix(line.substring(pos), "</b><br />", "</p>");
if (!xref.isEmpty())
parseJesusChronikText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), xref, footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<p>")) {
parseJesusChronikText(vv, cutAffix(line, "<p>", "</p>"), footnotePrefixes, footnoteList);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("©")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv, cutAffix(line, "", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else if (line.startsWith("<div class=\"e\">")) {
while (!line.endsWith("</div>")) line += " " + br.readLine().trim();
parseFormattedText(vv.visitFormattingInstruction(FormattingInstructionKind.ITALIC), cutAffix(line, "<div class=\"e\">", "</div>"), null, null);
vv.visitLineBreak(LineBreakKind.PARAGRAPH);
} else {
throw new IOException(line);
}
line = skipLines(br);
}
if (!footnoteList.isEmpty() || !footnotePrefixes.isEmpty())
throw new IOException(footnoteList.size() + " / " + footnotePrefixes.size());
}
}
anhang.getChapters().get(0).getProlog().trimWhitespace();
anhang.getChapters().get(0).getProlog().finished();
return bible;
}
use of biblemulticonverter.data.MetadataBook in project BibleMultiConverter by schierlm.
the class OSIS method doImport.
@Override
public Bible doImport(File inputFile) throws Exception {
ValidateXML.validateFileBeforeParsing(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(ObjectFactory.class.getResource("/osisCore.2.1.1.xsd")), inputFile);
printedWarnings.clear();
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
XPath xpath = javax.xml.xpath.XPathFactory.newInstance().newXPath();
Document osisDoc = docBuilder.parse(inputFile);
String name = xpath.evaluate("/osis/osisText/header/work/title/text()", osisDoc);
if (name.isEmpty())
name = "OSIS Bible";
Bible result = new Bible(name);
String description = xpath.evaluate("/osis/osisText/header/work/description/text()", osisDoc);
String rights = xpath.evaluate("/osis/osisText/header/work/rights/text()", osisDoc);
if (!description.isEmpty() || !rights.isEmpty()) {
String date = xpath.evaluate("/osis/osisText/header/work/date/text()", osisDoc);
String titleDesc = xpath.evaluate("/osis/osisText/titlePage/description/text()", osisDoc);
MetadataBook mb = new MetadataBook();
if (!description.isEmpty())
mb.setValue(MetadataBookKey.description, description.replaceAll("[\r\n\t ]+", " ").trim());
if (!rights.isEmpty())
mb.setValue(MetadataBookKey.rights, rights.replaceAll("[\r\n\t ]+", " ").trim());
if (!date.isEmpty())
mb.setValue(MetadataBookKey.date, date);
if (!titleDesc.isEmpty())
mb.setValue("description@titlePage", titleDesc.replaceAll("[\r\n\t ]+", " ").trim());
mb.finished();
result.getBooks().add(mb.getBook());
}
NodeList osisBooks = (NodeList) xpath.evaluate("/osis/osisText//div[@type='book']", osisDoc, XPathConstants.NODESET);
for (int bookIndex = 0; bookIndex < osisBooks.getLength(); bookIndex++) {
Element osisBook = (Element) osisBooks.item(bookIndex);
if (!osisBook.getAttribute("sID").isEmpty()) {
Element osisBookEnd = (Element) xpath.evaluate("//div[@eID='" + osisBook.getAttribute("sID") + "']", osisDoc, XPathConstants.NODE);
if (osisBookEnd == null) {
throw new IllegalStateException("No milestoned div found with eID " + osisBook.getAttribute("sID"));
}
if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
List<Node> candidates = new ArrayList<>();
Node commonParent = osisBookEnd;
while (commonParent != null) {
candidates.add(commonParent);
commonParent = commonParent.getParentNode();
}
commonParent = osisBook;
search: while (commonParent != null) {
for (Node candidate : candidates) {
if (commonParent.isSameNode(candidate)) {
break search;
}
}
commonParent = commonParent.getParentNode();
}
if (commonParent == null)
throw new IllegalStateException("Unable to find common parent of milestoned div start and end tag");
convertToMilestoned((Element) commonParent);
if (!osisBookEnd.getParentNode().isSameNode(osisBook.getParentNode())) {
throw new IllegalStateException("Unable to normalize XML so that milestoned div start and end tags are siblings");
}
}
while (osisBook.getNextSibling() != null && !osisBook.getNextSibling().isSameNode(osisBookEnd)) {
osisBook.appendChild(osisBook.getNextSibling());
}
osisBookEnd.getParentNode().removeChild(osisBookEnd);
}
String bookOsisID = osisBook.getAttribute("osisID");
BookID bookID = BookID.fromOsisId(bookOsisID);
String title = bookID.getEnglishName();
Node titleElem = osisBook.getFirstChild();
while (titleElem instanceof Text) titleElem = titleElem.getNextSibling();
if (titleElem instanceof Element && titleElem.getNodeName().equals("title")) {
Element titleElement = (Element) titleElem;
if (titleElement.getAttribute("type").equals("main") && titleElement.getChildNodes().getLength() > 0)
title = titleElement.getTextContent();
}
Book bibleBook = new Book(bookOsisID, bookID, title, title);
result.getBooks().add(bibleBook);
parseBook(bookOsisID, osisBook, bibleBook);
}
return result;
}
Aggregations