use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class DocxBuilder method writeDocxForDoc.
public static void writeDocxForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackeningSensibleData, File file, Set<Integer> pageIndices, IProgressMonitor monitor, boolean createTitle, boolean markUnclear, boolean expandAbbreviations, boolean replaceAbbrevs, boolean keepLineBreaks, boolean showSuppliedInBrackets, boolean ignoreSuppliedTags, ExportCache cache) throws JAXBException, IOException, Docx4JException, InterruptedException {
// ch.qos.logback.classic.Logger root = logger.getClass().get(ch.qos.logback.classic.Logger) org.slf4j.LoggerFactory.getLogger(ch.qos.logback.classic.Logger.ROOT_LOGGER_NAME);
((ch.qos.logback.classic.Logger) logger).setLevel(ch.qos.logback.classic.Level.DEBUG);
exportTags = writeTags;
doBlackening = doBlackeningSensibleData;
tagnames = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
markUnclearWords = markUnclear;
expandAbbrevs = expandAbbreviations;
preserveLineBreaks = keepLineBreaks;
substituteAbbrevs = replaceAbbrevs;
showSuppliedWithBrackets = showSuppliedInBrackets;
ignoreSupplied = ignoreSuppliedTags;
/*
* get all names of tags
*/
// tagnames = CustomTagFactory.getRegisteredTagNames();
// main document part
wordMLPackage = WordprocessingMLPackage.createPackage();
MainDocumentPart mdp = wordMLPackage.getMainDocumentPart();
org.docx4j.wml.ObjectFactory factory = Context.getWmlObjectFactory();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to docx", totalPages);
}
int c = 0;
boolean atLeastOnePageWritten = false;
// can be used as page break every time we need one
Br objBr = new Br();
objBr.setType(STBrType.PAGE);
P pageBreakP = factory.createP();
pageBreakP.getContent().add(objBr);
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (!atLeastOnePageWritten && createTitle) {
addTitlePage(doc, mdp);
// add page break
mdp.addObject(pageBreakP);
}
if (monitor != null) {
if (monitor.isCanceled()) {
throw new InterruptedException("Export canceled by the user");
// logger.debug("docx export cancelled!");
// return;
}
monitor.subTask("Processing page " + (c + 1));
}
// TrpPage page = pages.get(i);
// TrpTranscriptMetadata md = page.getCurrentTranscript();
// JAXBPageTranscript tr = new JAXBPageTranscript(md);
// tr.build();
JAXBPageTranscript tr = null;
if (cache != null) {
tr = cache.getPageTranscriptAtIndex(i);
}
if (tr == null) {
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
// md.getStatus().equals("Done");
tr = new JAXBPageTranscript(md);
tr.build();
}
TrpPageType trpPage = tr.getPage();
logger.debug("writing docx for the page " + (i + 1) + "/" + doc.getNPages());
writeDocxForTranscriptWithTables(mdp, trpPage, wordBased, preserveLineBreaks);
atLeastOnePageWritten = true;
++c;
if (monitor != null) {
monitor.worked(c);
}
}
P p = factory.createP();
mdp.getContent().add(p);
addComplexField(p, " INDEX \\e \"", "\" \\c \"1\" \\z \"1031\"");
FieldUpdater updater = new FieldUpdater(wordMLPackage);
updater.update(true);
// write tags at end of last page
if (false) {
// RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
logger.debug("export tags ");
boolean firstExport = true;
// tagnames = all user choosen tags via export dialog
for (String currTagname : tagnames) {
// logger.debug("curr tagname " + currTagname);
// get all custom tags with currTagname and text
HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
// one paragraph for each tagname
org.docx4j.wml.P p4Tag = factory.createP();
if (allTagsOfThisTagname.size() > 0 && !currTagname.equals("textStyle") && !currTagname.equals("gap") && !currTagname.equals("comment")) {
// new page if tag export starts
if (firstExport) {
// Br objBr = new Br();
// objBr.setType(STBrType.PAGE);
p4Tag.getContent().add(objBr);
firstExport = false;
}
// logger.debug("allTagsOfThisTagname " + allTagsOfThisTagname.size());
// one run for headline and thanfor each entry
org.docx4j.wml.Text t = factory.createText();
t.setValue(currTagname + " tags in this document: " + allTagsOfThisTagname.size());
t.setSpace("preserve");
org.docx4j.wml.R run = factory.createR();
run.getContent().add(t);
org.docx4j.wml.RPr rpr = factory.createRPr();
org.docx4j.wml.BooleanDefaultTrue b = new org.docx4j.wml.BooleanDefaultTrue();
b.setVal(true);
U u = factory.createU();
u.setVal(UnderlineEnumeration.SINGLE);
rpr.setB(b);
rpr.setU(u);
run.setRPr(rpr);
// this Br element is used break the current and go for next line
Br br = factory.createBr();
run.getContent().add(br);
p4Tag.getContent().add(run);
// ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
Collection<String> valueSet = allTagsOfThisTagname.values();
int l = 0;
for (String currEntry : valueSet) {
org.docx4j.wml.R currRun = factory.createR();
org.docx4j.wml.Text currText = factory.createText();
currText.setValue(currEntry);
currText.setSpace("preserve");
currRun.getContent().add(currText);
// reuse linebreak
currRun.getContent().add(br);
p4Tag.getContent().add(currRun);
}
}
mdp.getContent().add(p4Tag);
}
}
// finally save the file
wordMLPackage.save(file);
logger.info("Saved " + file.getAbsolutePath());
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class DocxBuilder method addIndexEntry.
private static void addIndexEntry(int idx, P p, String textStr, boolean rtl) {
ArrayList<CustomTag> allTagsAtThisPlace = idxList.get(idx);
for (CustomTag ct : allTagsAtThisPlace) {
int begin = ct.getOffset();
String tagname = ct.getTagName();
String idxText = textStr.substring(begin, idx);
if (rtl) {
idxText = reverseString(idxText);
// logger.debug("reversed index text is " + idxText);
}
if (ct instanceof AbbrevTag) {
AbbrevTag at = (AbbrevTag) ct;
if (!at.getExpansion().equals("")) {
if (!rtl)
idxText = idxText.concat(" [" + at.getExpansion() + "]");
else {
String tmp = reverseString(at.getExpansion());
idxText = "[" + tmp + "] ".concat(idxText);
}
}
}
if (!idxText.matches("[*]+")) {
addComplexField(p, "XE\"" + tagname + ":" + idxText + "\"", "");
}
}
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method getRtfTextForShapeElement.
private static RtfText getRtfTextForShapeElement(ITrpShapeType element) throws IOException {
String textStr = element.getUnicodeText();
CustomTagList cl = element.getCustomTagList();
if (textStr == null || cl == null)
throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
if (doBlackening) {
// format according to tags:CustomTagList
for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
if (nonIndexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
// logger.debug("nonindexed tag found ");
textStr = ExportUtils.blackenString(nonIndexedTag, textStr);
}
}
for (CustomTag indexedTag : cl.getIndexedTags()) {
if (indexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
// logger.debug("nonindexed tag found ");
textStr = ExportUtils.blackenString(indexedTag, textStr);
}
}
}
List<TextStyleTag> textStylesTags = element.getTextStyleTags();
// if (exportTags){
// getTagsForShapeElement(element);
// }
RtfText[] chars = new RtfText[textStr.length()];
for (int i = 0; i < textStr.length(); ++i) {
chars[i] = RtfText.text(textStr.substring(i, i + 1));
// format according to "global" text style
chars[i] = formatRtfText(chars[i], element.getTextStyle());
/*
* format according to custom style tag - check for each char in the text if a special style should be set
*/
for (TextStyleTag styleTag : textStylesTags) {
if (i >= styleTag.getOffset() && i < (styleTag.getOffset() + styleTag.getLength())) {
chars[i] = formatRtfText(chars[i], styleTag.getTextStyle());
}
}
// // format according to tags:
// for (String nonIndexedTag : cl.getNonIndexedTagNames()) {
// charText = formatRtfText(charText, element);
// // TODO
// }
// for (String indexedTag : cl.getIndexedTagNames()) {
// // TODO
// }
// TODO: include structure types!! (also possible in custom tags!!)
// TODO: include reading order!!!
}
RtfText totalText = RtfText.text(false, (Object[]) chars);
return totalText;
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class ExportCache method getTagsForShapeElement.
private void getTagsForShapeElement(ITrpShapeType element) throws IOException {
String textStr = element.getUnicodeText();
CustomTagList cl = element.getCustomTagList();
if (textStr == null || cl == null)
throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
// logger.debug("nonindexed tag found ");
if (!nonIndexedTag.getTagName().equals("readingOrder")) {
storeCustomTag(nonIndexedTag, textStr);
}
}
/*
* blacken String if necessary
*/
if (doBlackening) {
for (CustomTag indexedTag : cl.getIndexedTags()) {
if (indexedTag instanceof BlackeningTag) {
// logger.debug("blackening found " + textStr);
textStr = ExportUtils.blackenString(indexedTag, textStr);
}
}
}
for (CustomTag indexedTag : cl.getIndexedTags()) {
// logger.debug("tagname " + indexedTag.getTagName());
storeCustomTag(indexedTag, textStr);
}
}
Aggregations