use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class CustomTagListTest method checkIntegrity.
public static void checkIntegrity(CustomTagList tl) {
// test if styles were merged correctly:
int c = 0;
String lastTagName = null;
List<String> tagNamesEncountered = new ArrayList<>();
boolean indexed = false;
int length = tl.getTextLength();
CustomTag wholeRangeTag = new CustomTag("test", 0, length);
logger.trace("checking integrity of all tags in customtaglist!");
for (String tn : tl.getIndexedTagNames()) {
for (CustomTag ct : tl.getIndexedTags(tn)) {
if (ct.isIndexed()) {
Assert.assertTrue("CustomTag not inside list!", wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
// NEEDED????
if (!ct.getTagName().equals(lastTagName)) {
c = 0;
Assert.assertFalse("Indexed tags are mixed!", tagNamesEncountered.contains(ct.getTagName()));
//
tagNamesEncountered.add(ct.getTagName());
}
indexed = true;
Assert.assertTrue("Offset below last range: " + ct.getOffset() + " / " + c, ct.getOffset() >= c);
Assert.assertTrue("length <= 0!", ct.getLength() > 0);
// Assert.assertTrue("offset in range!", ct.getOffset()>=0);
c = ct.getOffset() + ct.getLength();
} else {
Assert.assertFalse("Multiple non-indexed tag-name!", ct.getTagName().equals(lastTagName));
}
Assert.assertTrue("Non-indexed and indexed tags are mixed!", ct.isIndexed() == indexed);
lastTagName = ct.getTagName();
}
}
for (CustomTag ct : tl.getNonIndexedTags()) {
Assert.assertFalse("Multiple non-indexed tag-name: " + ct.getTagName(), ct.getTagName().equals(lastTagName));
lastTagName = ct.getTagName();
}
try {
tl.checkAllTagRanges();
} catch (IndexOutOfBoundsException ie) {
Assert.assertTrue("Not all tags were in the line range!", false);
logger.error(ie.getMessage(), ie);
}
// for (CustomTag t : tl.getTags()) {
// System.out.println("t = "+t);
// }
logger.trace("checking integrity of customtaglist at each position!");
Set<String> tagNames = tl.getIndexedTagNames();
for (int i = 0; i < tl.getTextLength(); ++i) {
for (String tn : tagNames) {
// System.out.println("tn = "+tn);
List<CustomTag> tagsAtOffset = tl.getOverlappingTags(tn, i, 0);
// !!!!
Assert.assertTrue("More than two tags with same name at a single selection: " + i + ", n = " + tagsAtOffset.size(), tagsAtOffset.size() <= 2);
if (tagsAtOffset.size() == 2) {
Assert.assertTrue("Two tags with same name at a single selection but not in a series: " + tagsAtOffset.get(0) + " - " + tagsAtOffset.get(1), tagsAtOffset.get(0).getEnd() == tagsAtOffset.get(1).getOffset());
}
}
}
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class CustomTagListTest method testSimpleAddOrMergeTagWithTextStyles.
// @Ignore
@Test
public void testSimpleAddOrMergeTagWithTextStyles() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
TextStyleTag tst = new TextStyleTag(0, 10);
tst.setFontFamily("testFont");
tl.addOrMergeTag(tst, null);
TextStyleTag ts1 = new TextStyleTag(2, 5);
ts1.setBold(true);
tl.addOrMergeTag(ts1, null);
logger.trace(tl.toString());
Assert.assertEquals("Nr of text styles must be 3!", 3, tl.getTags().size());
TextStyleTag ts2 = new TextStyleTag(3, 4);
ts2.setItalic(true);
tl.addOrMergeTag(ts2, null);
Assert.assertEquals("Nr of text styles must be 4!", 4, tl.getTags().size());
logger.trace(tl.toString());
// Assert.assertEquals("Nr of text styles must be 5!", 5, tl.getTags().size());
Assert.assertTrue("offset = 0", tl.getTags().get(0).getOffset() == 0);
CustomTag last = tl.getTags().get(tl.getTags().size() - 1);
Assert.assertTrue("offset+length = 10", (last.getOffset() + last.getLength()) == 10);
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class TrpXlsxBuilder method writeTagsForShapeElement.
private static void writeTagsForShapeElement(ITrpShapeType element, String context, String doc, String page, String regionID, String lineID, String wordId, Set<String> selectedTags) throws IOException {
String textStr = element.getUnicodeText();
CustomTagList cl = element.getCustomTagList();
if (textStr == null || cl == null)
throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
/*
* custom tags
* alle attribute auslesen und schreiben
* wenn 1. row: attribute keys schreiben und values schreiben
* wenn n. row: index von key in 1. row (=0) suchen und den value dort hineinschreiben
*
*/
for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
if (!nonIndexedTag.getTagName().equals("textStyle") && !nonIndexedTag.getTagName().equals("readingOrder")) {
nonIndexedTag.getAttributesValuesMap();
// logger.debug("nonindexed tag found " + nonIndexedTag.getTagName());
}
}
for (CustomTag indexedTag : cl.getIndexedTags()) {
if (!indexedTag.getTagName().equals("textStyle")) {
// logger.debug("indexed tag found " + indexedTag.getTagName());
Sheet firstSheet;
Sheet currSheet;
String tagname = indexedTag.getTagName();
String overview = "Overview";
if (!selectedTags.contains(tagname)) {
break;
}
/*
*first Excel page is the overview -> all tags without their special tag attributes
*/
if (wb.getSheet(overview) != null) {
firstSheet = wb.getSheet(overview);
} else {
firstSheet = wb.createSheet(WorkbookUtil.createSafeSheetName(overview));
}
// either find existent sheet or create new one
if (wb.getSheet(tagname) != null) {
currSheet = wb.getSheet(tagname);
// logger.debug("existent sheet " + tagname);
} else {
currSheet = wb.createSheet(WorkbookUtil.createSafeSheetName(tagname));
// logger.debug("new sheet " + tagname);
}
CreationHelper crHelper = wb.getCreationHelper();
Map<String, Object> attributes = indexedTag.getAttributeNamesValuesMap();
Iterator<String> attributeIterator = attributes.keySet().iterator();
int offset = (int) attributes.get("offset");
int length = (int) attributes.get("length");
// logger.debug("text string " + textStr + " length " +textStr.length() + " offset " + offset + " length of substring " + length);
String tmpTextStr = textStr.substring(offset, offset + length);
int lastRowIdxOfFirstSheet = firstSheet.getLastRowNum();
if (lastRowIdxOfFirstSheet == 0) {
fillFirstOverviewRow(firstSheet);
}
int lastRowIdx = currSheet.getLastRowNum();
// logger.debug("lastRowIdx " + lastRowIdx);
if (lastRowIdx == 0) {
fillFirstRow(currSheet, attributes, crHelper);
}
/*
* the first (overview) sheet shows all custom tags of the doc - tag attributes are stored as a list in one cell
*/
Row nextRowOfFirstSheet = firstSheet.createRow(++lastRowIdxOfFirstSheet);
int idxHelper = 0;
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(tmpTextStr);
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(context);
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(doc);
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(page);
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(regionID);
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(lineID);
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(wordId);
// all attributes are s
nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(tagname + " " + attributes.toString());
/*
* subsequent sheets shows all different tags on their own sheet
*
*/
Row nextRow = currSheet.createRow(++lastRowIdx);
int idx = 0;
nextRow.createCell(idx++).setCellValue(tmpTextStr);
nextRow.createCell(idx++).setCellValue(context);
nextRow.createCell(idx++).setCellValue(doc);
nextRow.createCell(idx++).setCellValue(page);
nextRow.createCell(idx++).setCellValue(regionID);
nextRow.createCell(idx++).setCellValue(lineID);
nextRow.createCell(idx++).setCellValue(wordId);
// for (int i = 0; i < attributes.size(); i++){
// String attributeName = attributeIterator.next();
// logger.debug("attributeName " + attributeName);
// firstRow.createCell(i+idx).setCellValue(crHelper.createRichTextString(attributeName));
// Object value = attributes.get(attributeName);
// logger.debug("attribute value " + value);
// nextRow.createCell(i+idx).setCellValue(crHelper.createRichTextString(String.valueOf(value)));
// }
/*
* each attribute of a custom tag is stored in a single cell
*/
Row row = currSheet.getRow(0);
for (int i = 0; i < attributes.size(); i++) {
String attributeName = attributeIterator.next();
Object value = attributes.get(attributeName);
for (int colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) {
Cell cell = row.getCell(colIdx);
if (cell.getRichStringCellValue().getString().equals(attributeName)) {
nextRow.createCell(colIdx).setCellValue(crHelper.createRichTextString(String.valueOf(value)));
break;
}
}
}
}
}
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class TrpPdfDocument method addTextFromTextRegion.
private void addTextFromTextRegion(final TextRegionType tr, final PdfContentByte cb, int cutoffLeft, int cutoffTop, BaseFont bf, ExportCache cache) throws IOException {
List<TextLineType> lines = tr.getTextLine();
boolean firstLine;
if (lines != null && !lines.isEmpty()) {
// sort according to reading order
Collections.sort(lines, new TrpElementReadingOrderComparator<TextLineType>(true));
double baseLineMeanY = 0;
double baseLineMeanYPrev = 0;
double baseLineMeanGap = 0;
// logger.debug("Processing " + lines.size() + " lines in TextRegion " + tr.getId());
for (TextLineType lt : lines) {
TrpTextLineType l = (TrpTextLineType) lt;
// java.awt.Rectangle lineRect = PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
// compute rotation of text, if rotation higher PI/16 than rotate otherwise even text
TrpBaselineType baseline = (TrpBaselineType) l.getBaseline();
double rotation = (baseline != null ? computeRotation(baseline) : 0);
// if (lineRect.height > 0){
// float lineHeight = lineRect.height /3;
//
// logger.debug("line height: "+ lineHeight);
//
// //ignore actual lineHeigth if three times the size of the actual line mean heigth
// if (!(lineHeight > lineMeanHeight*4) || lineMeanHeight == 0){
// //calculate line mean Height
// lineMeanHeight = (lineMeanHeight == 0 ? lineHeight : (lineMeanHeight + lineHeight)/2);
// logger.debug("lineMeanHeight: "+ lineMeanHeight);
// }
// }
// get the mean baseline y-value
baseLineMeanYPrev = baseLineMeanY;
if (baseline != null) {
// use lowest point in baseline and move up one half of the distance to the topmost point
java.awt.Rectangle baseLineRect = l.getBoundingBox();
baseLineMeanY = baseLineRect.getMaxY() - ((baseLineRect.getMaxY() - baseLineRect.getMinY()) / 2);
if (baseLineMeanYPrev != 0) {
baseLineMeanGap = baseLineMeanY - baseLineMeanYPrev;
}
}
boolean rtl = false;
if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
List<WordType> words = l.getWord();
for (WordType wt : words) {
TrpWordType w = (TrpWordType) wt;
if (!w.getUnicodeText().isEmpty()) {
// java.awt.Rectangle boundRect = PageXmlUtils.buildPolygon(w.getCoords()).getBounds();
java.awt.Rectangle boundRect = w.getBoundingBox();
String text = w.getUnicodeText();
rtl = textIsRTL(text.trim());
addString(boundRect, baseLineMeanY, text, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
} else {
// logger.info("No text content in word: " + w.getId());
}
}
} else if (!l.getUnicodeText().isEmpty()) {
String lineTextTmp = l.getUnicodeText();
// get surrounding rectangle coords of this line
java.awt.Rectangle boundRect = l.getBoundingBox();
Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(l, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
if (doBlackening && blackSet.size() > 0) {
// for all blackening regions replace text with ****
for (Map.Entry<CustomTag, String> currEntry : blackSet) {
if (!currEntry.getKey().isIndexed()) {
// logger.debug("line not indexed : " + lineTextTmp);
lineTextTmp = lineTextTmp.replaceAll(".", "*");
} else {
// logger.debug("lineText before blackened : " + lineTextTmp);
lineTextTmp = blackenString(currEntry, lineTextTmp);
// logger.debug("lineText after blackened : " + lineTextTmp);
}
}
}
rtl = textIsRTL(lineTextTmp.trim());
addString(boundRect, baseLineMeanY, lineTextTmp, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
/*
* highlight all tags of this text line if property is set
*/
// if (highlightTags){
// highlightTagsForShape(l);
//
// }
} else {
// logger.info("No text content in line: " + l.getId());
}
if (highlightTags) {
if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
List<WordType> words = l.getWord();
for (WordType wt : words) {
TrpWordType w = (TrpWordType) wt;
highlightTagsForShape(w, rtl, cache);
}
} else {
highlightTagsForShape(l, rtl, cache);
}
}
}
}
}
use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.
the class TrpPdfDocument method formatText.
private Chunk formatText(String currCharacter, List<TextStyleTag> styleTags, int currentIndex, ITrpShapeType currShape, ExportCache cache) throws IOException {
// first blacken char if needed
Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(currShape, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
if (!currCharacter.equals("") && doBlackening && blackSet.size() > 0) {
// for all blackening regions replace text with ****
for (Map.Entry<CustomTag, String> currEntry : blackSet) {
int beginIndex = currEntry.getKey().getOffset();
int endIndex = beginIndex + currEntry.getKey().getLength();
if (currentIndex >= beginIndex && currentIndex < endIndex) {
currCharacter = "*";
}
}
}
// create new chunk
Chunk currChunk = new Chunk(currCharacter);
// Font arial = new Font(bfArial, lineMeanHeight);
// Font arialBold = new Font(bfArialBold, lineMeanHeight);
// Font arialItalic = new Font(bfArialItalic, lineMeanHeight);
currChunk.setFont(fontArial);
Set<Entry<CustomTag, String>> commentSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(currShape, "comment").entrySet();
for (Map.Entry<CustomTag, String> currEntry : commentSet) {
int beginIndex = currEntry.getKey().getOffset();
int endIndex = beginIndex + currEntry.getKey().getLength();
if (currentIndex >= beginIndex && currentIndex < endIndex) {
// hex string #FFF8B0: yellow color
currChunk.setBackground(new BaseColor(Color.decode("#FFF8B0").getRGB()));
}
}
/*
* format according to custom style tag - check for each char in the text if a special style should be set
*/
for (TextStyleTag styleTag : styleTags) {
if (currentIndex >= (wordOffset + styleTag.getOffset()) && currentIndex < (wordOffset + styleTag.getOffset() + styleTag.getLength())) {
if (CoreUtils.val(styleTag.getBold())) {
// logger.debug("BOOOOOOOOOLD");
currChunk.setFont(fontArialBold);
}
if (CoreUtils.val(styleTag.getItalic())) {
// logger.debug("ITAAAAAAAAAAAALIC");
currChunk.setFont(fontArialItalic);
}
if (CoreUtils.val(styleTag.getStrikethrough())) {
// logger.debug("Striiiiiiiiikethrough");
currChunk.setUnderline(0.2f, 3f);
}
// }
if (CoreUtils.val(styleTag.getUnderlined())) {
// logger.debug("Underliiiiiiined");
currChunk.setUnderline(0.2f, -3f);
}
}
}
if (highlightTags) {
Set<Entry<CustomTag, String>> entrySet;
entrySet = ExportUtils.getAllTagsForShapeElement(currShape).entrySet();
int k = 1;
int tagId = 0;
int[] prevLength = new int[entrySet.size()];
int[] prevOffset = new int[entrySet.size()];
for (Map.Entry<CustomTag, String> currEntry : entrySet) {
// Set<String> wantedTags = ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames());
Set<String> wantedTags = cache.getOnlySelectedTagnames(CustomTagFactory.getRegisteredTagNames());
if (wantedTags.contains(currEntry.getKey().getTagName())) {
// logger.debug("current tag name "+ currEntry.getKey().getTagName());
// logger.debug("current tag text "+ currEntry.getKey().getContainedText());
String color = CustomTagFactory.getTagColor(currEntry.getKey().getTagName());
int currLength = currEntry.getKey().getLength();
int currOffset = wordOffset + currEntry.getKey().getOffset();
if (color != null && currentIndex >= (currOffset) && currentIndex <= (currOffset + currLength)) {
/**
* if the current tag overlaps one of the previous tags
* -> increase the distance of the line under the textline
*/
if (isOverlaped(prevOffset, prevLength, currOffset, currLength)) {
k++;
// logger.debug("overlapped is true, k = " + k);
} else {
k = 1;
// logger.debug("overlapped is not true, k = " + k);
}
currChunk.setUnderline(new BaseColor(Color.decode(color).getRGB()), 0.8f, 0.0f, -2f * +1f * k, 0.0f, PdfContentByte.LINE_CAP_BUTT);
// logger.debug("UNDERLINE curr chunk " + currChunk.getContent() + " k = " + k);
}
prevOffset[tagId] = currOffset;
prevLength[tagId] = currLength;
tagId++;
// yShift -> vertical shift of underline if several tags are at the same position
// float yShift = (lineMeanHeight/6) * k;
}
}
}
// logger.debug("chunk content is " + currChunk.getContent());
return currChunk;
}
Aggregations