use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method writeRtfForDoc.
public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackening, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException {
exportTags = writeTags;
tagnames = cache.getSelectedTags();
TrpRtfBuilder.doBlackening = doBlackening;
/*
* get all names of tags
*/
// tagnames = CustomTagFactory.getRegisteredTagNames();
Rtf rtf = Rtf.rtf();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to RTF", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
logger.debug("RTF export cancelled!");
return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
// rtf().header(color( 204, 0, 0 ).at( 0 ),
// color( 0, 0xff, 0 ).at( 1 ),
// color( 0, 0, 0xff ).at( 2 ),
// font( "Calibri" ).at( 0 ) );
// RtfHeaderColor color = RtfHeaderColor.color(0xff, 0, 0);
rtf.header(color(204, 0, 0).at(0), color(0, 0xff, 0).at(1)).section(getRtfParagraphsForTranscript(trpPage, wordBased));
++c;
if (monitor != null) {
monitor.worked(c);
}
}
// write tags at end of last page
if (exportTags) {
// RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
/*
* for all different tagnames:
* find all custom tags in doc
* create list and
*/
ArrayList<RtfPara> tagParas = new ArrayList<RtfPara>();
// tagnames = all user choosen tags via export dialog
for (String currTagname : tagnames) {
// logger.debug("curr tagname " + currTagname);
// get all custom tags with currTagname and text
HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
if (allTagsOfThisTagname.size() > 0) {
tagParas.add(RtfPara.p(RtfText.text(RtfText.underline(currTagname + " tags in this document: " + allTagsOfThisTagname.size()))));
// ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
Collection<String> valueSet = allTagsOfThisTagname.values();
RtfText[] tagTexts = new RtfText[valueSet.size()];
int l = 0;
for (String currEntry : valueSet) {
tagTexts[l++] = RtfText.text(currEntry.concat("\n"));
// logger.debug("tag value is " + currEntry);
}
tagParas.add(RtfPara.p(tagTexts));
}
}
// int parSize = getParsNumber();
// int k = 0;
//
// if (persons.size() > 0){
// logger.debug("k is " + k);
// List<String> newPersonList = new ArrayList<String>(new HashSet<String>(persons));
// tagParas[k++]=RtfPara.p(RtfText.text("Person names in this document (amount of found persons: " + newPersonList.size() + ")", "\n"));
// logger.debug("k is " + k);
// //rtf.p("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
// //to make the list contain only unique values
//
// RtfText[] personTexts = new RtfText[newPersonList.size()];
// for (int j=0; j<newPersonList.size(); ++j) {
// personTexts[j] = RtfText.text(newPersonList.get(j), "\n");
// logger.debug("person is " + newPersonList.get(j));
// }
// tagParas[k++] = RtfPara.p(personTexts);
// }
//
// if (places.size() > 0){
// List<String> newPlaceList = new ArrayList<String>(new HashSet<String>(places));
// tagParas[k++]=RtfPara.p(RtfText.text("Places in this document (amount of found places " + newPlaceList.size() + ")", "\n"));
//
// RtfText[] placeTexts = new RtfText[newPlaceList.size()];
// for (int j=0; j<newPlaceList.size(); ++j) {
// //RtfText.color(0, "red");
// placeTexts[j] = RtfText.color(0, newPlaceList.get(j).concat("\n"));
// logger.debug("place is " + newPlaceList.get(j));
// }
// RtfPara par2 = RtfPara.p(placeTexts);
// tagParas[k++] = par2;
// }
//
// if(addresses.size() > 0){
// List<String> newAddressList = new ArrayList<String>(new HashSet<String>(addresses));
// tagParas[k++]=RtfPara.p(RtfText.text("Addresses in this document (amount of found addresses " + newAddressList.size() + ")", "\n"));
//
// RtfText[] addresseTexts = new RtfText[newAddressList.size()];
// for (int j=0; j<newAddressList.size(); ++j) {
// addresseTexts[j] = RtfText.text(newAddressList.get(j), "\n");
// logger.debug("addresse is " + newAddressList.get(j));
// }
// RtfPara par3 = RtfPara.p(addresseTexts);
// tagParas[k++] = par3;
// }
// rtf.section(par3);
rtf.header(color(204, 0, 0).at(0)).section(tagParas);
}
rtf.out(new FileWriter(file));
logger.info("wrote rtf to: " + file.getAbsolutePath());
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class ExportUtils method getAllTagsOfThisTypeForShapeElement.
public static LinkedHashMap<CustomTag, String> getAllTagsOfThisTypeForShapeElement(ITrpShapeType element, String type) throws IOException {
LinkedHashMap<CustomTag, String> elementTags = new LinkedHashMap<CustomTag, String>();
String textStr = element.getUnicodeText();
CustomTagList cl = element.getCustomTagList();
for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
if (nonIndexedTag.getTagName().equals(type)) {
// logger.debug("nonindexed tag found ");
elementTags.put(nonIndexedTag, textStr);
}
}
for (CustomTag indexedTag : cl.getIndexedTags()) {
if (indexedTag.getTagName().equals(type)) {
// logger.debug("indexed tag found ");
elementTags.put(indexedTag, textStr);
}
}
return elementTags;
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class ExportUtils method getAllTagsForShapeElement.
public static LinkedHashMap<CustomTag, String> getAllTagsForShapeElement(ITrpShapeType element) throws IOException {
LinkedHashMap<CustomTag, String> elementTags = new LinkedHashMap<CustomTag, String>();
String textStr = element.getUnicodeText();
CustomTagList cl = element.getCustomTagList();
for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
if (!nonIndexedTag.getTagName().equals(TextStyleTag.TAG_NAME) && !nonIndexedTag.getTagName().equals(BlackeningTag.TAG_NAME)) {
// logger.debug("nonindexed tag found ");
elementTags.put(nonIndexedTag, textStr);
}
}
for (CustomTag indexedTag : cl.getIndexedTags()) {
if (!indexedTag.getTagName().equals(TextStyleTag.TAG_NAME) && !indexedTag.getTagName().equals(BlackeningTag.TAG_NAME) && !indexedTag.getTagName().equals(ReadingOrderTag.TAG_NAME)) {
// logger.debug("indexed tag found ");
elementTags.put(indexedTag, textStr);
}
}
return elementTags;
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class CustomTagListTest method testMultipleRandomIndexedAddOrMergeTag.
// @Ignore
@Test
public void testMultipleRandomIndexedAddOrMergeTag() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
int textLength = tl.getTextLength();
CustomTag wholeRangeTag = new CustomTag("test", 0, textLength);
String[] nonIndexedTags = new String[] { "a_non_indexed", "b_ni", "c_balbla_non_indexed" };
// + overlap to test exceptions when index out of bounds!
int rangeOfTags = textLength + 5;
final int N = (int) 1e3;
for (int i = 0; i < N; ++i) {
// int sizeBefore = tl.getIndexedTags("test").size();
int o = rand.nextInt(rangeOfTags);
int l = rand.nextInt(rangeOfTags - o) + 1;
CustomTag ct = null;
int d = rand.nextInt(3);
if (d == 0) {
// structure tag
ct = new CustomTag("a_test_indexed", o, l);
} else if (d == 1) {
// text style tag
ct = new TextStyleTag(o, l);
((TextStyleTag) ct).setBold(rand.nextBoolean());
((TextStyleTag) ct).setItalic(rand.nextBoolean());
((TextStyleTag) ct).setMonospace(rand.nextBoolean());
} else {
ct = new CustomTag(nonIndexedTags[rand.nextInt(3)]);
// ct = new CustomTag("non-indexed");
}
// logger.info("range: "+o+","+l);
// CustomTag ct = new CustomTag("test", o, l);
logger.trace("i=" + i + "/" + N);
logger.trace("adding custom tag: " + ct);
logger.trace("list before = " + tl);
try {
tl.addOrMergeTag(ct, null);
Assert.assertTrue("Indexed CustomTag was not inside but no exception thrown: " + ct, !ct.isIndexed() || wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
} catch (IndexOutOfBoundsException ie) {
Assert.assertTrue("CustomTag was inside but exception thrown: " + ct, wholeRangeTag.getOverlapType(ct) != OverlapType.INSIDE);
logger.trace("Exception for tag not inside: " + ct);
}
logger.trace("list after = " + tl);
checkIntegrity(tl);
// int sizeAfter = tl.getIndexedTags("test").size();
// logger.debug("sizeAfter = "+sizeAfter);
}
logger.info("list = " + tl);
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class DocxBuilder method getFormattedTextForShapeElement.
private static void getFormattedTextForShapeElement(ITrpShapeType element, P p, MainDocumentPart mdp) throws Exception {
ArrayList<R> listOfallRuns = new ArrayList<R>();
String textStr = element.getUnicodeText();
CustomTagList cl = element.getCustomTagList();
if (textStr == null || cl == null)
throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
if (textStr.isEmpty()) {
return;
}
boolean rtl = false;
// from right to left
if (Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT || Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC || Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING || Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE) {
logger.debug("&&&&&&&& STRING IS RTL : ");
deleteCharAtIndex(0, textStr);
rtl = true;
}
// format according to tags:CustomTagList
for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
// exchange chars with * if wished to be blackened
if (doBlackening && nonIndexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
// logger.debug("nonindexed tag found ");
textStr = ExportUtils.blackenString(nonIndexedTag, textStr);
}
/*
* for gap and comment: remember their position to find and add them to their corresponding 'run' later on
*
*/
if (nonIndexedTag.getTagName().equals("gap")) {
GapTag gap = (GapTag) nonIndexedTag;
gapList.put(nonIndexedTag.getOffset(), gap);
}
// unclear and comments can not be non-indexed
// if (nonIndexedTag.getTagName().equals("comment")){
// logger.debug("nonindexed comment tag found ");
// CommentTag ct = (CommentTag) nonIndexedTag;
// commentList.put(nonIndexedTag.getEnd()-1, ct.getComment());
// }
// if(nonIndexedTag.getTagName().equals("unclear")){
// logger.debug("unclear tag found ");
// unclearList.put(nonIndexedTag.getOffset(), nonIndexedTag.getOffset()+nonIndexedTag.getLength());
// }
}
for (CustomTag indexedTag : cl.getIndexedTags()) {
if (doBlackening && indexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
textStr = ExportUtils.blackenString(indexedTag, textStr);
}
/*
* find all gaps and store the offset
*/
if (indexedTag.getTagName().equals("gap")) {
GapTag gap = (GapTag) indexedTag;
gapList.put(indexedTag.getOffset(), gap);
}
if (indexedTag.getTagName().equals("comment")) {
// logger.debug("indexed comment tag found at pos " + (indexedTag.getEnd()-1));
CommentTag ct = (CommentTag) indexedTag;
commentList.put(indexedTag.getEnd() - 1, ct.getComment());
}
// if(exportTags){
if (markUnclearWords && indexedTag.getTagName().equals("unclear")) {
// logger.debug("unclear tag found ");
// logger.debug("unclear start is: " + indexedTag.getOffset());
// logger.debug("unclear end is: " + (indexedTag.getEnd()-1));
unclearList.put(indexedTag.getOffset(), indexedTag.getEnd() - 1);
}
if (expandAbbrevs && indexedTag.getTagName().equals("abbrev")) {
logger.debug("abbrev tag found ");
AbbrevTag at = (AbbrevTag) indexedTag;
String expansion = at.getExpansion();
// only add if an expansion was typed
if (!expansion.equals("")) {
expandAbbrevList.put(indexedTag.getEnd(), at.getExpansion());
}
}
if (substituteAbbrevs && indexedTag.getTagName().equals("abbrev")) {
// logger.debug("abbrev tag found ");
AbbrevTag at = (AbbrevTag) indexedTag;
String expansion = at.getExpansion();
// key is the start of the abbrev
if (!expansion.equals("")) {
substituteAbbrevList.put(indexedTag.getOffset(), at);
}
}
if (showSuppliedWithBrackets && indexedTag.getTagName().equals("supplied")) {
// logger.debug("supplied tag found ");
SuppliedTag at = (SuppliedTag) indexedTag;
String text = at.getContainedText();
// only add if an expansion was typed
if (!text.equals("")) {
showSuppliedList.put(indexedTag.getOffset(), text);
}
}
if (ignoreSupplied && indexedTag.getTagName().equals("supplied")) {
// logger.debug("supplied tag found ");
SuppliedTag at = (SuppliedTag) indexedTag;
String text = at.getContainedText();
// only add if an expansion was typed
if (!text.equals("")) {
ignoreSuppliedList.put(indexedTag.getOffset(), text);
}
}
// create index for all choosen tagnames
if (exportTags && tagnames.contains(indexedTag.getTagName()) && !indexedTag.getTagName().equals("gap")) {
// logger.debug("export tag as idx entry " + indexedTag.getOffset());
addValuesToIdxList(idxList, indexedTag.getEnd(), indexedTag);
}
// }
}
List<TextStyleTag> textStylesTags = element.getTextStyleTags();
// ArrayList<R> runs = new ArrayList<R>();
boolean shapeEnded = false;
for (int i = 0; i <= textStr.length(); ++i) {
// use of abbrevIdx: this is necessary for the appearance at the end of a textline
// otherwise the abbrev expansion would not appear at the end of a line because then the index i would be too small
shapeEnded = (i + 1 >= textStr.length() ? true : false);
/*
* is this case the abbrev gets totally replaced by its expansion
* so if the start of the abbrev was found the expansion is written and we can break the writing of the abbrev
*/
if (substituteAbbrevList.containsKey(i)) {
String exp = substituteAbbrevList.get(i).getExpansion();
if (rtl) {
exp = reverseString(exp);
}
org.docx4j.wml.Text abbrevText = factory.createText();
abbrevText.setValue(exp);
org.docx4j.wml.R abbrevRun = factory.createR();
// p.getContent().add(abbrevRun);
abbrevRun.getContent().add(abbrevText);
listOfallRuns.add(abbrevRun);
// go to end of the abbreviation and proceed with remaining text
i += substituteAbbrevList.get(i).getLength();
shapeEnded = (i == textStr.length() ? true : false);
}
/*
* add expansion in brackets behind the abbrev
* the abbrev list contains as key the end index of the abbrev
*/
if (expandAbbrevList.containsKey(i)) {
String exp = expandAbbrevList.get(i);
if (rtl) {
exp = reverseString(exp);
}
org.docx4j.wml.Text abbrevText = factory.createText();
abbrevText.setValue("[" + exp + "]");
org.docx4j.wml.R abbrevRun = factory.createR();
// p.getContent().add(abbrevRun);
abbrevRun.getContent().add(abbrevText);
listOfallRuns.add(abbrevRun);
}
/*
* in this case the supplied tag is expanded either with or without brackets
*
*/
if (showSuppliedList.containsKey(i)) {
String exp = showSuppliedList.get(i);
if (rtl) {
exp = reverseString(exp);
}
org.docx4j.wml.Text suppliedText = factory.createText();
suppliedText.setValue("[" + exp + "]");
org.docx4j.wml.R suppliedRun = factory.createR();
suppliedRun.getContent().add(suppliedText);
listOfallRuns.add(suppliedRun);
// supplied is handled now - so set i to the end of supplied
i += showSuppliedList.get(i).length();
shapeEnded = (i == textStr.length() ? true : false);
}
/*
* in this case the supplied tag gets ignored
* this means that index i must be incremented by the length of this supplied tag text
*/
if (ignoreSuppliedList.containsKey(i)) {
i += ignoreSuppliedList.get(i).length();
shapeEnded = (i == textStr.length() ? true : false);
}
/*
* gap is at this position
* hence create extra run with [...] as value and then go on
* of if suppied attribute is set handle supplied as set in the export settings
*/
if (gapList.containsKey(i)) {
org.docx4j.wml.Text t = factory.createText();
// if (!rtl)
// t.setValue("[...] ");
// else
// t.setValue(" [...]");
GapTag gt = gapList.get(i);
String cta = (String) gt.getAttributeValue("supplied");
// attribute supplied is set in the gap tag -> handle supplied as wanted
if (cta != null && !cta.equals("")) {
// may the gap with supplied attribute gets ignored
if (!ignoreSupplied) {
if (showSuppliedWithBrackets) {
t.setValue("[" + cta + "]");
}
// do not show supplied attribute by default!?
// else{
// t.setValue(cta);
// }
}
} else // nothing supplied, so show [...] for the gap tag
{
t.setValue("[...]");
t.setSpace("preserve");
}
org.docx4j.wml.R run = factory.createR();
// p.getContent().add(run);
run.getContent().add(t);
listOfallRuns.add(run);
}
// begin of unclear word should be marked with [ and end with ]
if (unclearList.containsKey(i)) {
org.docx4j.wml.Text t = factory.createText();
if (!rtl)
t.setValue("[");
else
t.setValue("]");
org.docx4j.wml.R run = factory.createR();
// p.getContent().add(run);
run.getContent().add(t);
listOfallRuns.add(run);
}
/*
* if so we create an index entry for this text string in the docx
*/
if (idxList.containsKey(i)) {
addIndexEntry(i, p, textStr, rtl);
}
String currText = "";
if (i + 1 <= textStr.length()) {
currText = textStr.substring(i, i + 1);
// logger.debug("&&&&&&&& current single char : " + currText);
}
/*
* 2nd is (should be) soft hyphen with Unicode U+00AD
* First arg is not sign and was initially used for soft hyphen by Diggitexx
* need to be at the line end - otherwise
*
*/
if ((currText.equals("¬") || currText.equals("") || currText.equals("-")) && !preserveLineBreaks && shapeEnded) {
break;
}
org.docx4j.wml.Text t = factory.createText();
t.setValue(currText);
t.setSpace("preserve");
org.docx4j.wml.R run = factory.createR();
// p.getContent().add(run);
run.getContent().add(t);
listOfallRuns.add(run);
// end of unclear tag
if (unclearList.containsValue(i)) {
org.docx4j.wml.Text unclearEnd = factory.createText();
if (!rtl)
unclearEnd.setValue("]");
else
unclearEnd.setValue("[");
org.docx4j.wml.R unclearRun = factory.createR();
// p.getContent().add(unclearRun);
unclearRun.getContent().add(unclearEnd);
listOfallRuns.add(unclearRun);
}
// the properties of this text section
org.docx4j.wml.RPr rpr = factory.createRPr();
/*
* format according to custom style tag - check for each char in the text if a special style should be set
*/
for (TextStyleTag styleTag : textStylesTags) {
if (i >= styleTag.getOffset() && i < (styleTag.getOffset() + styleTag.getLength())) {
org.docx4j.wml.BooleanDefaultTrue b = new org.docx4j.wml.BooleanDefaultTrue();
b.setVal(true);
TextStyleType ts = styleTag.getTextStyle();
if (ts == null)
continue;
if (CoreUtils.val(ts.isBold())) {
rpr.setB(b);
}
if (CoreUtils.val(ts.isItalic())) {
rpr.setI(b);
}
if (CoreUtils.val(ts.isLetterSpaced())) {
// ????
}
if (CoreUtils.val(ts.isMonospace())) {
// ????
}
if (CoreUtils.val(ts.isReverseVideo())) {
// ????
}
if (CoreUtils.val(ts.isSerif())) {
// ????
}
if (CoreUtils.val(ts.isSmallCaps())) {
rpr.setSmallCaps(b);
}
if (CoreUtils.val(ts.isStrikethrough())) {
rpr.setStrike(b);
}
if (CoreUtils.val(ts.isSubscript())) {
org.docx4j.wml.CTVerticalAlignRun al = factory.createCTVerticalAlignRun();
al.setVal(STVerticalAlignRun.SUBSCRIPT);
rpr.setVertAlign(al);
}
if (CoreUtils.val(ts.isSuperscript())) {
org.docx4j.wml.CTVerticalAlignRun al = factory.createCTVerticalAlignRun();
al.setVal(STVerticalAlignRun.SUPERSCRIPT);
rpr.setVertAlign(al);
}
if (CoreUtils.val(ts.isUnderlined())) {
U u = factory.createU();
u.setVal(UnderlineEnumeration.SINGLE);
rpr.setU(u);
}
// BooleanDefaultTrue bdt = Context.getWmlObjectFactory().createBooleanDefaultTrue();
// bdt.setVal(Boolean.TRUE);
// rpr.setRtl(bdt);
// rpr.setHighlight(new Highlight());
}
}
// at the run properties (= text styles) to the run
run.setRPr(rpr);
// find position of footnote/comment
if (commentList.containsKey(i)) {
// logger.debug("position of comment: " + i);
// logger.debug("value of comment: " + commentList.get(i));
// creates the footnote at the end of the wished text - this position was found at the beginning of this method
org.docx4j.wml.R fnRun = factory.createR();
// p.getContent().add(fnRun);
createFootnote(commentList.get(i), fnRun, mdp);
listOfallRuns.add(fnRun);
}
/*
* add space at end of line if line breaks are not preserved
*/
if (!preserveLineBreaks && shapeEnded) {
org.docx4j.wml.Text space = factory.createText();
space.setValue(" ");
space.setSpace("preserve");
org.docx4j.wml.R runSpace = factory.createR();
// p.getContent().add(runSpace);
runSpace.getContent().add(space);
listOfallRuns.add(runSpace);
}
// runs.add(run);
}
if (rtl) {
PPr paragraphProperties = factory.createPPr();
Jc justification = factory.createJc();
justification.setVal(JcEnumeration.RIGHT);
paragraphProperties.setJc(justification);
p.setPPr(paragraphProperties);
}
for (int i = listOfallRuns.size() - 1; i >= 0; i--) {
if (rtl) {
p.getContent().add(listOfallRuns.get(i));
} else {
p.getContent().addAll(listOfallRuns);
break;
}
}
clearAllLists();
}
Aggregations