Search in sources :

Example 6 with TrpTextLineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method addTextFromTextRegion.

private void addTextFromTextRegion(final TextRegionType tr, final PdfContentByte cb, int cutoffLeft, int cutoffTop, BaseFont bf, ExportCache cache) throws IOException {
    List<TextLineType> lines = tr.getTextLine();
    boolean firstLine;
    if (lines != null && !lines.isEmpty()) {
        // sort according to reading order
        Collections.sort(lines, new TrpElementReadingOrderComparator<TextLineType>(true));
        double baseLineMeanY = 0;
        double baseLineMeanYPrev = 0;
        double baseLineMeanGap = 0;
        // logger.debug("Processing " + lines.size() + " lines in TextRegion " + tr.getId());
        for (TextLineType lt : lines) {
            TrpTextLineType l = (TrpTextLineType) lt;
            // java.awt.Rectangle lineRect = PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
            // compute rotation of text, if rotation higher PI/16 than rotate otherwise even text
            TrpBaselineType baseline = (TrpBaselineType) l.getBaseline();
            double rotation = (baseline != null ? computeRotation(baseline) : 0);
            // if (lineRect.height > 0){
            // float lineHeight = lineRect.height /3;
            // 
            // logger.debug("line height: "+ lineHeight);
            // 
            // //ignore actual lineHeigth if three times the size of the actual line mean heigth
            // if (!(lineHeight > lineMeanHeight*4) || lineMeanHeight == 0){
            // //calculate line mean Height
            // lineMeanHeight = (lineMeanHeight == 0 ? lineHeight : (lineMeanHeight + lineHeight)/2);
            // logger.debug("lineMeanHeight: "+ lineMeanHeight);
            // }
            // }
            // get the mean baseline y-value
            baseLineMeanYPrev = baseLineMeanY;
            if (baseline != null) {
                // use lowest point in baseline and move up one half of the distance to the topmost point
                java.awt.Rectangle baseLineRect = l.getBoundingBox();
                baseLineMeanY = baseLineRect.getMaxY() - ((baseLineRect.getMaxY() - baseLineRect.getMinY()) / 2);
                if (baseLineMeanYPrev != 0) {
                    baseLineMeanGap = baseLineMeanY - baseLineMeanYPrev;
                }
            }
            boolean rtl = false;
            if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
                List<WordType> words = l.getWord();
                for (WordType wt : words) {
                    TrpWordType w = (TrpWordType) wt;
                    if (!w.getUnicodeText().isEmpty()) {
                        // java.awt.Rectangle boundRect = PageXmlUtils.buildPolygon(w.getCoords()).getBounds();
                        java.awt.Rectangle boundRect = w.getBoundingBox();
                        String text = w.getUnicodeText();
                        rtl = textIsRTL(text.trim());
                        addString(boundRect, baseLineMeanY, text, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
                    } else {
                    // logger.info("No text content in word: " + w.getId());
                    }
                }
            } else if (!l.getUnicodeText().isEmpty()) {
                String lineTextTmp = l.getUnicodeText();
                // get surrounding rectangle coords of this line
                java.awt.Rectangle boundRect = l.getBoundingBox();
                Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(l, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
                if (doBlackening && blackSet.size() > 0) {
                    // for all blackening regions replace text with ****
                    for (Map.Entry<CustomTag, String> currEntry : blackSet) {
                        if (!currEntry.getKey().isIndexed()) {
                            // logger.debug("line not indexed : " + lineTextTmp);
                            lineTextTmp = lineTextTmp.replaceAll(".", "*");
                        } else {
                            // logger.debug("lineText before blackened : " + lineTextTmp);
                            lineTextTmp = blackenString(currEntry, lineTextTmp);
                        // logger.debug("lineText after blackened : " + lineTextTmp);
                        }
                    }
                }
                rtl = textIsRTL(lineTextTmp.trim());
                addString(boundRect, baseLineMeanY, lineTextTmp, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
            /*
					 * highlight all tags of this text line if property is set
					 */
            // if (highlightTags){
            // highlightTagsForShape(l);
            // 
            // }
            } else {
            // logger.info("No text content in line: " + l.getId());
            }
            if (highlightTags) {
                if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
                    List<WordType> words = l.getWord();
                    for (WordType wt : words) {
                        TrpWordType w = (TrpWordType) wt;
                        highlightTagsForShape(w, rtl, cache);
                    }
                } else {
                    highlightTagsForShape(l, rtl, cache);
                }
            }
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) Set(java.util.Set) HashSet(java.util.HashSet) Rectangle(java.awt.Rectangle) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Entry(java.util.Map.Entry) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)

Example 7 with TrpTextLineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method highlightTagsForShape.

private void highlightTagsForShape(ITrpShapeType shape, boolean rtl, ExportCache cache) throws IOException {
    int tagId = 0;
    int k = 1;
    Set<Entry<CustomTag, String>> entrySet = ExportUtils.getAllTagsForShapeElement(shape).entrySet();
    // Set<String> wantedTags = ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames());
    Set<String> wantedTags = cache.getOnlySelectedTagnames(CustomTagFactory.getRegisteredTagNames());
    // logger.debug("wanted tags in TRPPDFDOC " + wantedTags.size());
    int[] prevLength = new int[entrySet.size()];
    int[] prevOffset = new int[entrySet.size()];
    boolean falling = true;
    BaselineType baseline = null;
    if (shape instanceof TrpTextLineType) {
        TrpTextLineType l = (TrpTextLineType) shape;
        baseline = l.getBaseline();
    } else if (shape instanceof TrpWordType) {
        TrpWordType w = (TrpWordType) shape;
        TrpTextLineType l = (TrpTextLineType) w.getParentShape();
        baseline = l.getBaseline();
    }
    try {
        List<Point> ptsList = null;
        if (baseline != null) {
            ptsList = PointStrUtils.parsePoints(baseline.getPoints());
        }
        if (ptsList != null) {
            int size = ptsList.size();
            // logger.debug("l.getBaseline().getPoints() " + l.getBaseline().getPoints());
            if (size >= 2 && ptsList.get(0).y < ptsList.get(size - 1).y) {
                // logger.debug("falling is false ");
                falling = false;
            }
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    for (Map.Entry<CustomTag, String> currEntry : entrySet) {
        if (wantedTags.contains(currEntry.getKey().getTagName())) {
            String color = CustomTagFactory.getTagColor(currEntry.getKey().getTagName());
            int currLength = currEntry.getKey().getLength();
            int currOffset = currEntry.getKey().getOffset();
            /**
             * if the current tag overlaps one of the previous tags
             * -> increase the distance of the line under the textline
             */
            // if (isOverlaped(prevOffset, prevLength, currOffset, currLength)){
            // k++;
            // }
            // else{
            // k=1;
            // }
            k = getAmountOfOverlaps(prevOffset, prevLength, currOffset, currLength);
            // logger.debug("current tag name "+ currEntry.getKey().getTagName() + " k is " + k);
            // logger.debug("current tag text "+ currEntry.getKey().getContainedText());
            prevOffset[tagId] = currOffset;
            prevLength[tagId] = currLength;
            tagId++;
            float yShift = (lineMeanHeight / 6) * k;
            /*
				 * remember where to draw line with help of a list
				 */
            if (baseline != null) {
                // use lowest point in baseline and move up one half of the distance to the topmost point
                // java.awt.Rectangle baseLineRect = PageXmlUtils.buildPolygon(baseline.getPoints()).getBounds();
                java.awt.Rectangle baseLineRect = ((TrpBaselineType) baseline).getBoundingBox();
                calculateTagLines(baseLineRect, shape, currEntry.getKey().getContainedText(), currOffset, currLength, color, yShift, falling, rtl);
            }
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) Point(java.awt.Point) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) Point(java.awt.Point) URISyntaxException(java.net.URISyntaxException) JAXBException(javax.xml.bind.JAXBException) FileNotFoundException(java.io.FileNotFoundException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) DocumentException(com.itextpdf.text.DocumentException) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Entry(java.util.Map.Entry) BaselineType(eu.transkribus.core.model.beans.pagecontent.BaselineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Map(java.util.Map) HashMap(java.util.HashMap)

Example 8 with TrpTextLineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method isOnlyRegionInThisRow.

private boolean isOnlyRegionInThisRow(List<TrpRegionType> regions, TextRegionType regionToCompare) {
    float minX = 0;
    float minY = 0;
    float maxX = 0;
    float maxY = 0;
    float meanX = 0;
    float meanY = 0;
    java.awt.Rectangle compareBlock = regionToCompare.getBoundingBox();
    float compareMinX = (float) compareBlock.getMinX();
    float compareMinY = (float) compareBlock.getMinY();
    float compareMaxX = (float) compareBlock.getMaxX();
    float compareMaxY = (float) compareBlock.getMaxY();
    float compareMeanX = compareMinX + (compareMaxX - compareMinX) / 2;
    float compareMeanY = compareMinY + (compareMaxY - compareMinY) / 2;
    boolean foundSmallerColumn = false;
    if (regions.size() == 1) {
        return true;
    } else {
        for (RegionType r : regions) {
            // TODO add paths for tables etc.
            if (r instanceof TextRegionType && r.getId() != regionToCompare.getId()) {
                TextRegionType tr = (TextRegionType) r;
                // empty region can be ignored
                if (tr.getTextLine().isEmpty())
                    continue;
                else {
                    // region with empty lines can also be ignored
                    boolean textFound = false;
                    for (TextLineType tlt : tr.getTextLine()) {
                        TrpTextLineType l = (TrpTextLineType) tlt;
                        textFound = !l.getUnicodeText().isEmpty();
                        if (textFound) {
                            break;
                        }
                    }
                    // no text in region -> go to next region
                    if (!textFound) {
                        continue;
                    }
                }
                // logger.debug("tr id " + tr.getId());
                // compute average text region start
                // java.awt.Rectangle block = PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds();
                java.awt.Rectangle block = tr.getBoundingBox();
                minX = (float) block.getMinX();
                maxX = (float) block.getMaxX();
                minY = (float) block.getMinY();
                maxY = (float) block.getMaxY();
                // meanX = minX+(maxX - minX)/2;
                meanY = minY + (maxY - minY) / 2;
                if (((meanY > compareMinY && meanY < compareMaxY) || (compareMeanY > minY && compareMeanY < maxY))) {
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) Rectangle(java.awt.Rectangle) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) UnknownRegionType(eu.transkribus.core.model.beans.pagecontent.UnknownRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)

Example 9 with TrpTextLineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.

the class CustomTagListTest method testCommonTag.

@Test
public void testCommonTag() {
    TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
    line.setUnicodeText("Hello world!", null);
    CustomTagList tl = new CustomTagList(line);
    TextStyleTag ts1 = new TextStyleTag(0, 10);
    ts1.setBold(true);
    tl.addOrMergeTag(ts1, null);
    logger.debug("ts1 = " + tl);
    TextStyleTag ts2 = new TextStyleTag(3, 3);
    ts2.setItalic(true);
    tl.addOrMergeTag(ts2, "italic");
    logger.debug("ts2 = " + tl);
    // ts2.setFontFamily("hello");
    // tl.addOrMergeTag(ts2, null);
    // TextStyleTag t1 = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 3, 3);
    // logger.debug("t1 = "+t1);
    Assert.assertEquals("Nr. of merged elements must be 3", 3, tl.getTags().size());
    TextStyleTag common = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 2, 6);
    TextStyleTag check = new TextStyleTag();
    check.setBold(true);
    check.setOffset(2);
    check.setLength(6);
    logger.debug("common = " + common);
    logger.debug("check = " + common);
    Assert.assertTrue("", common.equalsEffectiveValues(check, true));
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) Test(org.junit.Test)

Example 10 with TrpTextLineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.

the class CustomTagListTest method testMultipleRandomIndexedAddOrMergeTag.

// @Ignore
@Test
public void testMultipleRandomIndexedAddOrMergeTag() {
    TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
    line.setUnicodeText("Hello world!", null);
    CustomTagList tl = new CustomTagList(line);
    int textLength = tl.getTextLength();
    CustomTag wholeRangeTag = new CustomTag("test", 0, textLength);
    String[] nonIndexedTags = new String[] { "a_non_indexed", "b_ni", "c_balbla_non_indexed" };
    // + overlap to test exceptions when index out of bounds!
    int rangeOfTags = textLength + 5;
    final int N = (int) 1e3;
    for (int i = 0; i < N; ++i) {
        // int sizeBefore = tl.getIndexedTags("test").size();
        int o = rand.nextInt(rangeOfTags);
        int l = rand.nextInt(rangeOfTags - o) + 1;
        CustomTag ct = null;
        int d = rand.nextInt(3);
        if (d == 0) {
            // structure tag
            ct = new CustomTag("a_test_indexed", o, l);
        } else if (d == 1) {
            // text style tag
            ct = new TextStyleTag(o, l);
            ((TextStyleTag) ct).setBold(rand.nextBoolean());
            ((TextStyleTag) ct).setItalic(rand.nextBoolean());
            ((TextStyleTag) ct).setMonospace(rand.nextBoolean());
        } else {
            ct = new CustomTag(nonIndexedTags[rand.nextInt(3)]);
        // ct = new CustomTag("non-indexed");
        }
        // logger.info("range: "+o+","+l);
        // CustomTag ct = new CustomTag("test", o, l);
        logger.trace("i=" + i + "/" + N);
        logger.trace("adding custom tag: " + ct);
        logger.trace("list before = " + tl);
        try {
            tl.addOrMergeTag(ct, null);
            Assert.assertTrue("Indexed CustomTag was not inside but no exception thrown: " + ct, !ct.isIndexed() || wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
        } catch (IndexOutOfBoundsException ie) {
            Assert.assertTrue("CustomTag was inside but exception thrown: " + ct, wholeRangeTag.getOverlapType(ct) != OverlapType.INSIDE);
            logger.trace("Exception for tag not inside: " + ct);
        }
        logger.trace("list after = " + tl);
        checkIntegrity(tl);
    // int sizeAfter = tl.getIndexedTags("test").size();
    // logger.debug("sizeAfter = "+sizeAfter);
    }
    logger.info("list = " + tl);
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) Test(org.junit.Test)

Aggregations

TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)22 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)16 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)12 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)11 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)9 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)8 Rectangle (java.awt.Rectangle)7 TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)5 IOException (java.io.IOException)5 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)4 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)4 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)4 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)3 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)3 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)3 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)3 ArrayList (java.util.ArrayList)3 Test (org.junit.Test)3 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpPage (eu.transkribus.core.model.beans.TrpPage)2