Search in sources :

Example 1 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class CustomTagListTest method checkIntegrity.

public static void checkIntegrity(CustomTagList tl) {
    // test if styles were merged correctly:
    int c = 0;
    String lastTagName = null;
    List<String> tagNamesEncountered = new ArrayList<>();
    boolean indexed = false;
    int length = tl.getTextLength();
    CustomTag wholeRangeTag = new CustomTag("test", 0, length);
    logger.trace("checking integrity of all tags in customtaglist!");
    for (String tn : tl.getIndexedTagNames()) {
        for (CustomTag ct : tl.getIndexedTags(tn)) {
            if (ct.isIndexed()) {
                Assert.assertTrue("CustomTag not inside list!", wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
                // NEEDED????
                if (!ct.getTagName().equals(lastTagName)) {
                    c = 0;
                    Assert.assertFalse("Indexed tags are mixed!", tagNamesEncountered.contains(ct.getTagName()));
                    // 
                    tagNamesEncountered.add(ct.getTagName());
                }
                indexed = true;
                Assert.assertTrue("Offset below last range: " + ct.getOffset() + " / " + c, ct.getOffset() >= c);
                Assert.assertTrue("length <= 0!", ct.getLength() > 0);
                // Assert.assertTrue("offset in range!", ct.getOffset()>=0);
                c = ct.getOffset() + ct.getLength();
            } else {
                Assert.assertFalse("Multiple non-indexed tag-name!", ct.getTagName().equals(lastTagName));
            }
            Assert.assertTrue("Non-indexed and indexed tags are mixed!", ct.isIndexed() == indexed);
            lastTagName = ct.getTagName();
        }
    }
    for (CustomTag ct : tl.getNonIndexedTags()) {
        Assert.assertFalse("Multiple non-indexed tag-name: " + ct.getTagName(), ct.getTagName().equals(lastTagName));
        lastTagName = ct.getTagName();
    }
    try {
        tl.checkAllTagRanges();
    } catch (IndexOutOfBoundsException ie) {
        Assert.assertTrue("Not all tags were in the line range!", false);
        logger.error(ie.getMessage(), ie);
    }
    // for (CustomTag t : tl.getTags()) {
    // System.out.println("t = "+t);
    // }
    logger.trace("checking integrity of customtaglist at each position!");
    Set<String> tagNames = tl.getIndexedTagNames();
    for (int i = 0; i < tl.getTextLength(); ++i) {
        for (String tn : tagNames) {
            // System.out.println("tn = "+tn);
            List<CustomTag> tagsAtOffset = tl.getOverlappingTags(tn, i, 0);
            // !!!!
            Assert.assertTrue("More than two tags with same name at a single selection: " + i + ", n = " + tagsAtOffset.size(), tagsAtOffset.size() <= 2);
            if (tagsAtOffset.size() == 2) {
                Assert.assertTrue("Two tags with same name at a single selection but not in a series: " + tagsAtOffset.get(0) + " - " + tagsAtOffset.get(1), tagsAtOffset.get(0).getEnd() == tagsAtOffset.get(1).getOffset());
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag)

Example 2 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class CustomTagListTest method testSimpleAddOrMergeTagWithTextStyles.

// @Ignore
@Test
public void testSimpleAddOrMergeTagWithTextStyles() {
    TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
    line.setUnicodeText("Hello world!", null);
    CustomTagList tl = new CustomTagList(line);
    TextStyleTag tst = new TextStyleTag(0, 10);
    tst.setFontFamily("testFont");
    tl.addOrMergeTag(tst, null);
    TextStyleTag ts1 = new TextStyleTag(2, 5);
    ts1.setBold(true);
    tl.addOrMergeTag(ts1, null);
    logger.trace(tl.toString());
    Assert.assertEquals("Nr of text styles must be 3!", 3, tl.getTags().size());
    TextStyleTag ts2 = new TextStyleTag(3, 4);
    ts2.setItalic(true);
    tl.addOrMergeTag(ts2, null);
    Assert.assertEquals("Nr of text styles must be 4!", 4, tl.getTags().size());
    logger.trace(tl.toString());
    // Assert.assertEquals("Nr of text styles must be 5!", 5, tl.getTags().size());
    Assert.assertTrue("offset = 0", tl.getTags().get(0).getOffset() == 0);
    CustomTag last = tl.getTags().get(tl.getTags().size() - 1);
    Assert.assertTrue("offset+length = 10", (last.getOffset() + last.getLength()) == 10);
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) Test(org.junit.Test)

Example 3 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class TrpXlsxBuilder method writeTagsForShapeElement.

private static void writeTagsForShapeElement(ITrpShapeType element, String context, String doc, String page, String regionID, String lineID, String wordId, Set<String> selectedTags) throws IOException {
    String textStr = element.getUnicodeText();
    CustomTagList cl = element.getCustomTagList();
    if (textStr == null || cl == null)
        throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
    /*
		 * custom tags
		 * alle attribute auslesen und schreiben
		 * wenn 1. row: attribute keys schreiben und values schreiben
		 * wenn n. row: index von key in 1. row (=0) suchen und den value dort hineinschreiben
		 * 
		 */
    for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
        if (!nonIndexedTag.getTagName().equals("textStyle") && !nonIndexedTag.getTagName().equals("readingOrder")) {
            nonIndexedTag.getAttributesValuesMap();
        // logger.debug("nonindexed tag found " + nonIndexedTag.getTagName());
        }
    }
    for (CustomTag indexedTag : cl.getIndexedTags()) {
        if (!indexedTag.getTagName().equals("textStyle")) {
            // logger.debug("indexed tag found " + indexedTag.getTagName());
            Sheet firstSheet;
            Sheet currSheet;
            String tagname = indexedTag.getTagName();
            String overview = "Overview";
            if (!selectedTags.contains(tagname)) {
                break;
            }
            /*
				 *first Excel page is the overview -> all tags without their special tag attributes
				 */
            if (wb.getSheet(overview) != null) {
                firstSheet = wb.getSheet(overview);
            } else {
                firstSheet = wb.createSheet(WorkbookUtil.createSafeSheetName(overview));
            }
            // either find existent sheet or create new one
            if (wb.getSheet(tagname) != null) {
                currSheet = wb.getSheet(tagname);
            // logger.debug("existent sheet " + tagname);
            } else {
                currSheet = wb.createSheet(WorkbookUtil.createSafeSheetName(tagname));
            // logger.debug("new sheet " + tagname);
            }
            CreationHelper crHelper = wb.getCreationHelper();
            Map<String, Object> attributes = indexedTag.getAttributeNamesValuesMap();
            Iterator<String> attributeIterator = attributes.keySet().iterator();
            int offset = (int) attributes.get("offset");
            int length = (int) attributes.get("length");
            // logger.debug("text string " + textStr + " length " +textStr.length() + " offset " + offset + " length of substring " + length);
            String tmpTextStr = textStr.substring(offset, offset + length);
            int lastRowIdxOfFirstSheet = firstSheet.getLastRowNum();
            if (lastRowIdxOfFirstSheet == 0) {
                fillFirstOverviewRow(firstSheet);
            }
            int lastRowIdx = currSheet.getLastRowNum();
            // logger.debug("lastRowIdx " + lastRowIdx);
            if (lastRowIdx == 0) {
                fillFirstRow(currSheet, attributes, crHelper);
            }
            /*
				 * the first (overview) sheet shows all custom tags of the doc - tag attributes are stored as a list in one cell
				 */
            Row nextRowOfFirstSheet = firstSheet.createRow(++lastRowIdxOfFirstSheet);
            int idxHelper = 0;
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(tmpTextStr);
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(context);
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(doc);
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(page);
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(regionID);
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(lineID);
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(wordId);
            // all attributes are s
            nextRowOfFirstSheet.createCell(idxHelper++).setCellValue(tagname + " " + attributes.toString());
            /*
				 * subsequent sheets shows all different tags on their own sheet
				 * 
				 */
            Row nextRow = currSheet.createRow(++lastRowIdx);
            int idx = 0;
            nextRow.createCell(idx++).setCellValue(tmpTextStr);
            nextRow.createCell(idx++).setCellValue(context);
            nextRow.createCell(idx++).setCellValue(doc);
            nextRow.createCell(idx++).setCellValue(page);
            nextRow.createCell(idx++).setCellValue(regionID);
            nextRow.createCell(idx++).setCellValue(lineID);
            nextRow.createCell(idx++).setCellValue(wordId);
            // for (int i = 0; i < attributes.size(); i++){
            // String attributeName = attributeIterator.next();
            // logger.debug("attributeName " + attributeName);
            // firstRow.createCell(i+idx).setCellValue(crHelper.createRichTextString(attributeName));
            // Object value = attributes.get(attributeName);
            // logger.debug("attribute value " + value);
            // nextRow.createCell(i+idx).setCellValue(crHelper.createRichTextString(String.valueOf(value)));
            // }
            /*
				 * each attribute of a custom tag is stored in a single cell
				 */
            Row row = currSheet.getRow(0);
            for (int i = 0; i < attributes.size(); i++) {
                String attributeName = attributeIterator.next();
                Object value = attributes.get(attributeName);
                for (int colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) {
                    Cell cell = row.getCell(colIdx);
                    if (cell.getRichStringCellValue().getString().equals(attributeName)) {
                        nextRow.createCell(colIdx).setCellValue(crHelper.createRichTextString(String.valueOf(value)));
                        break;
                    }
                }
            }
        }
    }
}
Also used : CreationHelper(org.apache.poi.ss.usermodel.CreationHelper) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) IOException(java.io.IOException) Row(org.apache.poi.ss.usermodel.Row) Sheet(org.apache.poi.ss.usermodel.Sheet) Cell(org.apache.poi.ss.usermodel.Cell)

Example 4 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class TrpPdfDocument method addTextFromTextRegion.

private void addTextFromTextRegion(final TextRegionType tr, final PdfContentByte cb, int cutoffLeft, int cutoffTop, BaseFont bf, ExportCache cache) throws IOException {
    List<TextLineType> lines = tr.getTextLine();
    boolean firstLine;
    if (lines != null && !lines.isEmpty()) {
        // sort according to reading order
        Collections.sort(lines, new TrpElementReadingOrderComparator<TextLineType>(true));
        double baseLineMeanY = 0;
        double baseLineMeanYPrev = 0;
        double baseLineMeanGap = 0;
        // logger.debug("Processing " + lines.size() + " lines in TextRegion " + tr.getId());
        for (TextLineType lt : lines) {
            TrpTextLineType l = (TrpTextLineType) lt;
            // java.awt.Rectangle lineRect = PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
            // compute rotation of text, if rotation higher PI/16 than rotate otherwise even text
            TrpBaselineType baseline = (TrpBaselineType) l.getBaseline();
            double rotation = (baseline != null ? computeRotation(baseline) : 0);
            // if (lineRect.height > 0){
            // float lineHeight = lineRect.height /3;
            // 
            // logger.debug("line height: "+ lineHeight);
            // 
            // //ignore actual lineHeigth if three times the size of the actual line mean heigth
            // if (!(lineHeight > lineMeanHeight*4) || lineMeanHeight == 0){
            // //calculate line mean Height
            // lineMeanHeight = (lineMeanHeight == 0 ? lineHeight : (lineMeanHeight + lineHeight)/2);
            // logger.debug("lineMeanHeight: "+ lineMeanHeight);
            // }
            // }
            // get the mean baseline y-value
            baseLineMeanYPrev = baseLineMeanY;
            if (baseline != null) {
                // use lowest point in baseline and move up one half of the distance to the topmost point
                java.awt.Rectangle baseLineRect = l.getBoundingBox();
                baseLineMeanY = baseLineRect.getMaxY() - ((baseLineRect.getMaxY() - baseLineRect.getMinY()) / 2);
                if (baseLineMeanYPrev != 0) {
                    baseLineMeanGap = baseLineMeanY - baseLineMeanYPrev;
                }
            }
            boolean rtl = false;
            if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
                List<WordType> words = l.getWord();
                for (WordType wt : words) {
                    TrpWordType w = (TrpWordType) wt;
                    if (!w.getUnicodeText().isEmpty()) {
                        // java.awt.Rectangle boundRect = PageXmlUtils.buildPolygon(w.getCoords()).getBounds();
                        java.awt.Rectangle boundRect = w.getBoundingBox();
                        String text = w.getUnicodeText();
                        rtl = textIsRTL(text.trim());
                        addString(boundRect, baseLineMeanY, text, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
                    } else {
                    // logger.info("No text content in word: " + w.getId());
                    }
                }
            } else if (!l.getUnicodeText().isEmpty()) {
                String lineTextTmp = l.getUnicodeText();
                // get surrounding rectangle coords of this line
                java.awt.Rectangle boundRect = l.getBoundingBox();
                Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(l, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
                if (doBlackening && blackSet.size() > 0) {
                    // for all blackening regions replace text with ****
                    for (Map.Entry<CustomTag, String> currEntry : blackSet) {
                        if (!currEntry.getKey().isIndexed()) {
                            // logger.debug("line not indexed : " + lineTextTmp);
                            lineTextTmp = lineTextTmp.replaceAll(".", "*");
                        } else {
                            // logger.debug("lineText before blackened : " + lineTextTmp);
                            lineTextTmp = blackenString(currEntry, lineTextTmp);
                        // logger.debug("lineText after blackened : " + lineTextTmp);
                        }
                    }
                }
                rtl = textIsRTL(lineTextTmp.trim());
                addString(boundRect, baseLineMeanY, lineTextTmp, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
            /*
					 * highlight all tags of this text line if property is set
					 */
            // if (highlightTags){
            // highlightTagsForShape(l);
            // 
            // }
            } else {
            // logger.info("No text content in line: " + l.getId());
            }
            if (highlightTags) {
                if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
                    List<WordType> words = l.getWord();
                    for (WordType wt : words) {
                        TrpWordType w = (TrpWordType) wt;
                        highlightTagsForShape(w, rtl, cache);
                    }
                } else {
                    highlightTagsForShape(l, rtl, cache);
                }
            }
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) Set(java.util.Set) HashSet(java.util.HashSet) Rectangle(java.awt.Rectangle) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Entry(java.util.Map.Entry) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)

Example 5 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class TrpPdfDocument method formatText.

private Chunk formatText(String currCharacter, List<TextStyleTag> styleTags, int currentIndex, ITrpShapeType currShape, ExportCache cache) throws IOException {
    // first blacken char if needed
    Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(currShape, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
    if (!currCharacter.equals("") && doBlackening && blackSet.size() > 0) {
        // for all blackening regions replace text with ****
        for (Map.Entry<CustomTag, String> currEntry : blackSet) {
            int beginIndex = currEntry.getKey().getOffset();
            int endIndex = beginIndex + currEntry.getKey().getLength();
            if (currentIndex >= beginIndex && currentIndex < endIndex) {
                currCharacter = "*";
            }
        }
    }
    // create new chunk
    Chunk currChunk = new Chunk(currCharacter);
    // Font arial = new Font(bfArial, lineMeanHeight);
    // Font arialBold = new Font(bfArialBold, lineMeanHeight);
    // Font arialItalic = new Font(bfArialItalic, lineMeanHeight);
    currChunk.setFont(fontArial);
    Set<Entry<CustomTag, String>> commentSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(currShape, "comment").entrySet();
    for (Map.Entry<CustomTag, String> currEntry : commentSet) {
        int beginIndex = currEntry.getKey().getOffset();
        int endIndex = beginIndex + currEntry.getKey().getLength();
        if (currentIndex >= beginIndex && currentIndex < endIndex) {
            // hex string #FFF8B0: yellow color
            currChunk.setBackground(new BaseColor(Color.decode("#FFF8B0").getRGB()));
        }
    }
    /*
		 * format according to custom style tag - check for each char in the text if a special style should be set
		 */
    for (TextStyleTag styleTag : styleTags) {
        if (currentIndex >= (wordOffset + styleTag.getOffset()) && currentIndex < (wordOffset + styleTag.getOffset() + styleTag.getLength())) {
            if (CoreUtils.val(styleTag.getBold())) {
                // logger.debug("BOOOOOOOOOLD");
                currChunk.setFont(fontArialBold);
            }
            if (CoreUtils.val(styleTag.getItalic())) {
                // logger.debug("ITAAAAAAAAAAAALIC");
                currChunk.setFont(fontArialItalic);
            }
            if (CoreUtils.val(styleTag.getStrikethrough())) {
                // logger.debug("Striiiiiiiiikethrough");
                currChunk.setUnderline(0.2f, 3f);
            }
            // }
            if (CoreUtils.val(styleTag.getUnderlined())) {
                // logger.debug("Underliiiiiiined");
                currChunk.setUnderline(0.2f, -3f);
            }
        }
    }
    if (highlightTags) {
        Set<Entry<CustomTag, String>> entrySet;
        entrySet = ExportUtils.getAllTagsForShapeElement(currShape).entrySet();
        int k = 1;
        int tagId = 0;
        int[] prevLength = new int[entrySet.size()];
        int[] prevOffset = new int[entrySet.size()];
        for (Map.Entry<CustomTag, String> currEntry : entrySet) {
            // Set<String> wantedTags = ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames());
            Set<String> wantedTags = cache.getOnlySelectedTagnames(CustomTagFactory.getRegisteredTagNames());
            if (wantedTags.contains(currEntry.getKey().getTagName())) {
                // logger.debug("current tag name "+ currEntry.getKey().getTagName());
                // logger.debug("current tag text "+ currEntry.getKey().getContainedText());
                String color = CustomTagFactory.getTagColor(currEntry.getKey().getTagName());
                int currLength = currEntry.getKey().getLength();
                int currOffset = wordOffset + currEntry.getKey().getOffset();
                if (color != null && currentIndex >= (currOffset) && currentIndex <= (currOffset + currLength)) {
                    /**
                     * if the current tag overlaps one of the previous tags
                     * -> increase the distance of the line under the textline
                     */
                    if (isOverlaped(prevOffset, prevLength, currOffset, currLength)) {
                        k++;
                    // logger.debug("overlapped is true, k = " + k);
                    } else {
                        k = 1;
                    // logger.debug("overlapped is not true, k = " + k);
                    }
                    currChunk.setUnderline(new BaseColor(Color.decode(color).getRGB()), 0.8f, 0.0f, -2f * +1f * k, 0.0f, PdfContentByte.LINE_CAP_BUTT);
                // logger.debug("UNDERLINE curr chunk " + currChunk.getContent() + " k = " + k);
                }
                prevOffset[tagId] = currOffset;
                prevLength[tagId] = currLength;
                tagId++;
            // yShift -> vertical shift of underline if several tags are at the same position
            // float yShift = (lineMeanHeight/6) * k;
            }
        }
    }
    // logger.debug("chunk content is " + currChunk.getContent());
    return currChunk;
}
Also used : Entry(java.util.Map.Entry) BaseColor(com.itextpdf.text.BaseColor) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) Chunk(com.itextpdf.text.Chunk) Map(java.util.Map) HashMap(java.util.HashMap) Point(java.awt.Point)

Aggregations

CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)19 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)9 TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)5 IOException (java.io.IOException)5 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)4 Point (java.awt.Point)4 ArrayList (java.util.ArrayList)4 AbbrevTag (eu.transkribus.core.model.beans.customtags.AbbrevTag)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Entry (java.util.Map.Entry)3 RtfText (com.tutego.jrtf.RtfText)2 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpPage (eu.transkribus.core.model.beans.TrpPage)2 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2 CommentTag (eu.transkribus.core.model.beans.customtags.CommentTag)2 GapTag (eu.transkribus.core.model.beans.customtags.GapTag)2 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)2 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)2