Search in sources :

Example 1 with TrpBaselineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method addUniformTextFromTextRegion.

private void addUniformTextFromTextRegion(final TextRegionType tr, final PdfContentByte cb, int cutoffLeft, int cutoffTop, BaseFont bf, float lineStartX, ExportCache cache) throws IOException, DocumentException {
    List<TextLineType> lines = tr.getTextLine();
    if (lines != null && !lines.isEmpty()) {
        int i = 0;
        float lineStartY = 0;
        // sort according to reading order
        Collections.sort(lines, new TrpElementReadingOrderComparator<TextLineType>(true));
        double minY = 0;
        double maxY = 0;
        // get min and max values of region y direction for later calculation of textline height
        // java.awt.Rectangle regionRect = PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds();
        int maxIdx = lines.size() - 1;
        // java.awt.Rectangle firstLineRectOld = PageXmlUtils.buildPolygon(lines.get(0).getCoords().getPoints()).getBounds();
        // logger.debug("OLDDDDD: firstLineRectOld minX = " + firstLineRectOld.getMinX());
        java.awt.Rectangle firstLineRect = ((TrpTextLineType) lines.get(0)).getBoundingBox();
        // logger.debug("NEWWWWW: firstLineRect minX = " + firstLineRect.getMinX());
        // java.awt.Rectangle lastLineRect = PageXmlUtils.buildPolygon(lines.get(maxIdx).getCoords().getPoints()).getBounds();
        java.awt.Rectangle lastLineRect = ((TrpTextLineType) lines.get(maxIdx)).getBoundingBox();
        double firstLineRotation = computeRotation((TrpBaselineType) lines.get(0).getBaseline());
        double lastLineRotation = computeRotation((TrpBaselineType) lines.get(maxIdx).getBaseline());
        boolean isVerticalRegion = false;
        // use X coords to compute the total line gap
        if (firstLineRotation == 90 && lastLineRotation == 90) {
            // since the reading order is not clear if the text is vertically -> could be right to left or vice versa
            double tmpMinX1 = firstLineRect.getMinX();
            double tmpMinX2 = lastLineRect.getMinX();
            double tmpMaxX1 = firstLineRect.getMaxX();
            double tmpMaxX2 = lastLineRect.getMaxX();
            minY = Math.min(tmpMinX1, tmpMinX2);
            maxY = Math.max(tmpMaxX1, tmpMaxX2);
            isVerticalRegion = true;
        } else {
            minY = firstLineRect.getMinY();
            maxY = lastLineRect.getMaxY();
        }
        /*
			 * if start of line is too tight on the upper bound - set to the first 1/12 of t page from above
			 * BUT: Is not good since page number and other informations are often in this section
			 */
        // if (minY < twelfthPoints[1][1]){
        // minY = twelfthPoints[1][1];
        // }
        // for(TextLineType lt : lines){
        // 
        // TrpTextLineType l = (TrpTextLineType)lt;
        // java.awt.Rectangle lineRect = PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
        // 
        // 
        // 
        // if (lines.size() == 1){
        // minY = lineRect.getMinY();
        // maxY = lineRect.getMaxY();
        // 
        // }
        // else if (l.getIndex() == 0){
        // minY = lineRect.getMinY();
        // }
        // else if (l.getIndex() == lines.size()-1){
        // maxY = lineRect.getMaxY();
        // }
        // 
        // }
        double lineGap = (maxY - minY) / lines.size();
        // use default values if only one line  and no previous line mean height computed
        if (lines.size() == 1) {
            lineMeanHeight = (prevLineMeanHeight != 0 ? prevLineMeanHeight : lineMeanHeight);
        } else if (lines.size() > 1) {
            lineMeanHeight = (float) (2 * (lineGap / 3));
            leading = (int) (lineGap / 3);
            prevLineMeanHeight = lineMeanHeight;
        // logger.debug("Line Mean Height for Export " + lineMeanHeight);
        // overallLineMeanHeight = ( (overallLineMeanHeight != 0) ? overallLineMeanHeight+lineMeanHeight/2 : lineMeanHeight);
        }
        for (TextLineType lt : lines) {
            wordOffset = 0;
            TrpTextLineType l = (TrpTextLineType) lt;
            TrpBaselineType baseline = (TrpBaselineType) l.getBaseline();
            // PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
            java.awt.Rectangle lineRect = l.getBoundingBox();
            // PageXmlUtils.buildPolygon(baseline.getPoints()).getBounds();
            java.awt.Rectangle baseLineRect = baseline == null ? null : baseline.getBoundingBox();
            if (baseLineRect == null) {
                logger.debug("Baseline is null - ignore this line");
                continue;
            }
            float tmpLineStartX = lineStartX;
            // PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds().getMinX();
            float regionStartMinX = (float) tr.getBoundingBox().getMinX();
            double regionWidth = tr.getBoundingBox().getWidth();
            // first line
            if (i == 0) {
                lineStartY = (float) (minY + lineMeanHeight);
                /*
					 * if first line of a text region is indented then take this into account in printed text
					 */
                if (lineRect.getMinX() > regionStartMinX) {
                    if (lineRect.getMinX() - regionStartMinX > regionWidth / 4) {
                        // tmpLineStartX = (float) lineStartX + twelfthPoints[1][0];
                        tmpLineStartX = (float) baseLineRect.getMinX();
                    }
                }
            } else // for subsequent lines
            {
                if (lineRect.getMinX() > regionStartMinX) {
                    if (lineRect.getMinX() - regionStartMinX > regionWidth / 4) {
                        // tmpLineStartX = (float) lineStartX + twelfthPoints[1][0];
                        tmpLineStartX = (float) baseLineRect.getMinX();
                    }
                }
                // tmpLineStartX = getLinePositionInTextregionGrid(twelfthRegion, lineRect.getMinX());
                lineStartY = lineStartY + lineMeanHeight + leading;
            // for (TrpTextRegionType region : tr.getPage().getTextRegions(true)){
            // 
            // double regionMinX = PageXmlUtils.buildPolygon(region.getCoords().getPoints()).getBounds().getMinX();
            // double regionMaxX = PageXmlUtils.buildPolygon(region.getCoords().getPoints()).getBounds().getMaxX();
            // Rectangle rec = PageXmlUtils.buildPolygon(region.getCoords().getPoints()).getBounds();
            // 
            // if (rec.contains(tmpLineStartX, lineStartY) && !tr.getId().equals(region.getId()) && tmpLineStartX < regionMaxX){
            // logger.debug("region contains point " + tr.getId() + " region ID " + region.getId());
            // tmpLineStartX = (float) regionMaxX;
            // break;
            // }
            // 
            // 
            // }
            // if (lineRect.getMinX() > lineStartX){
            // if (lineRect.getMinX() - lineStartX > twelfthPoints[1][0]){
            // tmpLineStartX = (float) lineRect.getMinX();
            // }
            // }
            }
            if (baseLineRect != null && regionStartMinX < baseLineRect.getMinX() && (baseLineRect.getMinX() - regionStartMinX) > twelfthPoints[1][0]) {
                // logger.debug("try to find smaller region for baseline !!!!!!! " );
                for (TrpTextRegionType region : tr.getPage().getTextRegions(false)) {
                    if (!region.getId().equals(tr.getId())) {
                        // PageXmlUtils.buildPolygon(region.getCoords().getPoints()).getBounds().getMinX();
                        double regionMinX = region.getBoundingBox().getMinX();
                        double regionMaxX = region.getBoundingBox().getMaxX();
                        double regionMinY = region.getBoundingBox().getMinY();
                        double regionMaxY = region.getBoundingBox().getMaxY();
                        double meanX = regionMinX + (regionMaxX - regionMinX) / 2;
                        // another region before the lines
                        if (meanX > regionStartMinX && meanX < baseLineRect.getMinX() && baseLineRect.getMinY() < regionMaxY && baseLineRect.getMinY() > regionMinY) {
                            tmpLineStartX = (float) regionMaxX + lineMeanHeight;
                            logger.debug("region " + region.getId() + " overlaps this other region " + tr.getId());
                            // logger.debug("new tmplineStartX is " + regionMaxX);
                            break;
                        }
                    }
                }
            // tmpLineStartX = (float) baseLineRect.getMinX();
            }
            i++;
            /*
				 * word level bei uniform output nicht sinnvoll?
				 * besser nur ganze lines ausgeben
				 */
            // if(useWordLevel && !l.getWord().isEmpty()){
            // List<WordType> words = l.getWord();
            // for(WordType wt : words){
            // TrpWordType w = (TrpWordType)wt;
            // if(!w.getUnicodeText().isEmpty()){
            // java.awt.Rectangle boundRect = PageXmlUtils.buildPolygon(w.getCoords()).getBounds();
            // 
            // addUniformString(boundRect, lineMeanHeight, lineStartX, lineStartY, w.getUnicodeText(), cb, cutoffLeft, cutoffTop, bf);
            // } else {
            // logger.info("No text content in word: " + w.getId());
            // }
            // }
            // } else if(!l.getUnicodeText().isEmpty()){
            /*
				 * make chunks out of the lineText
				 * so it is possible to have differnt fonts, underlines and other text styles in one line
				 * 
				 * possible text styles are:
				 * 		new CustomTagAttribute("fontFamily", true, "Font family", "Font family"),
						new CustomTagAttribute("serif", true, "Serif", "Is this a serif font?"),
						new CustomTagAttribute("monospace",true, "Monospace", "Is this a monospace (i.e. equals width characters) font?"),
						new CustomTagAttribute("fontSize", true, "Font size", "The size of the font in points"),
						new CustomTagAttribute("kerning", true, "Kerning", "The kerning of the font, see: http://en.wikipedia.org/wiki/Kerning"),
						new CustomTagAttribute("textColour", true, "Text colour", "The foreground colour of the text"),
						new CustomTagAttribute("bgColour", true, "Background colour", "The background colour of the text"),
						new CustomTagAttribute("reverseVideo", true, "Reverse video", "http://en.wikipedia.org/wiki/Reverse_video"),
						new CustomTagAttribute("bold", true, "Bold", "Bold font"),
						new CustomTagAttribute("italic", true, "Italic", "Italic font"),
						new CustomTagAttribute("underlined", true, "Underlined", "Underlined"),
						new CustomTagAttribute("subscript", true, "Subscript", "Subscript"),
						new CustomTagAttribute("superscript", true, "Superscript", "Superscript"),
						new CustomTagAttribute("strikethrough", true, "Strikethrough", "Strikethrough"),
						new CustomTagAttribute("smallCaps", true, "Small caps", "Small capital letters at the height as lowercase letters, see: http://en.wikipedia.org/wiki/Small_caps"),
						new CustomTagAttribute("letterSpaced", true, "Letter spaced", "Equals distance between characters, see: http://en.wikipedia.org/wiki/Letter-spacing"),
				 */
            List<Chunk> chunkList = new ArrayList<Chunk>();
            /*
				 * if line is empty -> use the words of this line as line text
				 * otherwise take the text in the line
				 */
            List<TextStyleTag> styleTags = new ArrayList<TextStyleTag>();
            String shapeText = "";
            if (l.getUnicodeText().isEmpty() || useWordLevel) {
                // logger.debug("in word based path " + useWordLevel);
                List<WordType> words = l.getWord();
                int chunkIndex = 0;
                for (WordType wt : words) {
                    TrpWordType w = (TrpWordType) wt;
                    String wordText = "";
                    // add empty space after each word
                    if (chunkIndex > 0) {
                        chunkList.add(chunkIndex, new Chunk(" "));
                        chunkIndex++;
                    }
                    if (!w.getUnicodeText().isEmpty()) {
                        // remember all style tags for text formatting later on
                        styleTags.addAll(w.getTextStyleTags());
                        if (!shapeText.equals("")) {
                            shapeText = shapeText.concat(" ");
                        }
                        wordText = wordText.concat(w.getUnicodeText());
                        shapeText = shapeText.concat(w.getUnicodeText());
                        for (int j = 0; j < wordText.length(); ++j) {
                            String currentCharacter = wordText.substring(j, j + 1);
                            chunkList.add(chunkIndex, formatText(currentCharacter, styleTags, j, w, cache));
                            chunkIndex++;
                        }
                        styleTags.clear();
                    }
                }
            } else if (!l.getUnicodeText().isEmpty()) {
                String lineText = l.getUnicodeText();
                shapeText = lineText;
                // logger.debug("line Text is " + lineText);
                styleTags.addAll(l.getTextStyleTags());
                for (int j = 0; j < lineText.length(); ++j) {
                    String currentCharacter = lineText.substring(j, j + 1);
                    chunkList.add(j, formatText(currentCharacter, styleTags, j, l, cache));
                }
            } else // empty shape
            {
                logger.debug("empty shape ");
                continue;
            }
            Phrase phrase = new Phrase();
            // trim is important to get the 'real' first char for rtl definition
            boolean rtl = textIsRTL(shapeText.trim());
            if (rtl) {
                logger.debug("&&&&&&&& STRING IS RTL : ");
            }
            for (int j = chunkList.size() - 1; j >= 0; j--) {
                if (rtl) {
                    phrase.add(chunkList.get(j));
                } else {
                    phrase.addAll(chunkList);
                    break;
                }
            }
            // phrase.addAll(chunkList);
            // logger.debug("curr phrase is: " + phrase.getContent());
            // compute rotation of text, if rotation higher PI/16 than rotate otherwise even text
            /*
				 * No rotation for single lines in a overall horizontal text region 
				 * Reason: Vertical line uses too much space - calculated for horizontal
				 */
            double rotation = 0;
            if (isVerticalRegion) {
                rotation = (baseline != null ? computeRotation(baseline) : 0);
                if (rotation != 0) {
                    /*
						 * if we rotate e.g. 90° than we should use the actual x location of the line
						 * so vertical text must be treated different than horizontal text 
						 */
                    if (baseLineRect != null) {
                        if (rtl) {
                            tmpLineStartX = (float) baseLineRect.getMaxX();
                        } else {
                            tmpLineStartX = (float) baseLineRect.getMinX();
                        }
                        lineStartY = (float) baseLineRect.getMaxY();
                    } else if (lineRect != null) {
                        tmpLineStartX = lineRect.x;
                        lineStartY = (float) lineRect.getMaxY();
                    }
                }
            }
            // blacken Strings if wanted
            // Set<Entry<CustomTag, String>> blackSet = CustomTagUtils.getAllTagsOfThisTypeForShapeElement(l, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
            // 
            // if (!lineText.equals("") && doBlackening && blackSet.size() > 0){
            // 
            // //for all blackening regions replace text with ****
            // for (Map.Entry<CustomTag, String> currEntry : blackSet){
            // 
            // if (!currEntry.getKey().isIndexed()){
            // //logger.debug("line not indexed : " + lineText);
            // lineText = lineText.replaceAll(".", "*");
            // }
            // else{
            // lineText = blackenString(currEntry, lineText);
            // //logger.debug("lineText after blackened : " + lineText);
            // }
            // }
            // }
            // for rtl export
            float lineEndX = 0;
            float width = 0;
            if (baseLineRect != null) {
                lineEndX = (float) baseLineRect.getMaxX();
                width = (float) baseLineRect.getWidth();
            // this leads to an extra start for each line instead of having a combined start for all lines in a region
            // tmpLineStartX = (float) (lineEndX - baseLineRect.getWidth());
            } else if (lineRect != null) {
                lineEndX = lineRect.x + lineRect.width;
                width = (float) lineRect.getWidth();
            }
            // mainly for very small regions at the very left of a page
            if (tmpLineStartX > lineEndX) {
                lineEndX = tmpLineStartX + width;
            }
            // logger.debug("width " + width);
            // logger.debug("lineEndX " + lineEndX);
            // first add uniform String (=line), ,after that eventaully highlight the tags in this line using the current line information like x/y position,
            // addUniformString(lineMeanHeight, tmpLineStartX, lineStartY, lineText, cb, cutoffLeft, cutoffTop, bf, twelfthPoints[1][0], false, null, rotation);
            addUniformString(tr.getBoundingBox(), lineMeanHeight, tmpLineStartX, lineStartY, lineEndX, phrase, cb, cutoffLeft, cutoffTop, bf, twelfthPoints[1][0], false, null, rotation, rtl);
        /*
				 * old:
				 * highlight all tags of this text line if property is set
				 * no highlighting is done during chunk formatting and not in an extra step
				 */
        // if (highlightTags){
        // 
        // 
        // Set<Entry<CustomTag, String>> entrySet = CustomTagUtils.getAllTagsForShapeElement(l).entrySet();
        // 
        // highlightUniformString(entrySet, tmpLineStartX, lineStartY, l, cb, cutoffLeft, cutoffTop, bf);
        // 
        // List<WordType> words = l.getWord();
        // for(WordType wt : words){
        // TrpWordType w = (TrpWordType)wt;
        // 
        // Set<Entry<CustomTag, String>> entrySet2 = CustomTagUtils.getAllTagsForShapeElement(w).entrySet();
        // 
        // highlightUniformString(entrySet2, tmpLineStartX, lineStartY, l, cb, cutoffLeft, cutoffTop, bf);
        // }
        // 
        // }
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) ArrayList(java.util.ArrayList) Phrase(com.itextpdf.text.Phrase) Chunk(com.itextpdf.text.Chunk) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) Point(java.awt.Point) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)

Example 2 with TrpBaselineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method addTextFromTextRegion.

private void addTextFromTextRegion(final TextRegionType tr, final PdfContentByte cb, int cutoffLeft, int cutoffTop, BaseFont bf, ExportCache cache) throws IOException {
    List<TextLineType> lines = tr.getTextLine();
    boolean firstLine;
    if (lines != null && !lines.isEmpty()) {
        // sort according to reading order
        Collections.sort(lines, new TrpElementReadingOrderComparator<TextLineType>(true));
        double baseLineMeanY = 0;
        double baseLineMeanYPrev = 0;
        double baseLineMeanGap = 0;
        // logger.debug("Processing " + lines.size() + " lines in TextRegion " + tr.getId());
        for (TextLineType lt : lines) {
            TrpTextLineType l = (TrpTextLineType) lt;
            // java.awt.Rectangle lineRect = PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
            // compute rotation of text, if rotation higher PI/16 than rotate otherwise even text
            TrpBaselineType baseline = (TrpBaselineType) l.getBaseline();
            double rotation = (baseline != null ? computeRotation(baseline) : 0);
            // if (lineRect.height > 0){
            // float lineHeight = lineRect.height /3;
            // 
            // logger.debug("line height: "+ lineHeight);
            // 
            // //ignore actual lineHeigth if three times the size of the actual line mean heigth
            // if (!(lineHeight > lineMeanHeight*4) || lineMeanHeight == 0){
            // //calculate line mean Height
            // lineMeanHeight = (lineMeanHeight == 0 ? lineHeight : (lineMeanHeight + lineHeight)/2);
            // logger.debug("lineMeanHeight: "+ lineMeanHeight);
            // }
            // }
            // get the mean baseline y-value
            baseLineMeanYPrev = baseLineMeanY;
            if (baseline != null) {
                // use lowest point in baseline and move up one half of the distance to the topmost point
                java.awt.Rectangle baseLineRect = l.getBoundingBox();
                baseLineMeanY = baseLineRect.getMaxY() - ((baseLineRect.getMaxY() - baseLineRect.getMinY()) / 2);
                if (baseLineMeanYPrev != 0) {
                    baseLineMeanGap = baseLineMeanY - baseLineMeanYPrev;
                }
            }
            boolean rtl = false;
            if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
                List<WordType> words = l.getWord();
                for (WordType wt : words) {
                    TrpWordType w = (TrpWordType) wt;
                    if (!w.getUnicodeText().isEmpty()) {
                        // java.awt.Rectangle boundRect = PageXmlUtils.buildPolygon(w.getCoords()).getBounds();
                        java.awt.Rectangle boundRect = w.getBoundingBox();
                        String text = w.getUnicodeText();
                        rtl = textIsRTL(text.trim());
                        addString(boundRect, baseLineMeanY, text, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
                    } else {
                    // logger.info("No text content in word: " + w.getId());
                    }
                }
            } else if (!l.getUnicodeText().isEmpty()) {
                String lineTextTmp = l.getUnicodeText();
                // get surrounding rectangle coords of this line
                java.awt.Rectangle boundRect = l.getBoundingBox();
                Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(l, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
                if (doBlackening && blackSet.size() > 0) {
                    // for all blackening regions replace text with ****
                    for (Map.Entry<CustomTag, String> currEntry : blackSet) {
                        if (!currEntry.getKey().isIndexed()) {
                            // logger.debug("line not indexed : " + lineTextTmp);
                            lineTextTmp = lineTextTmp.replaceAll(".", "*");
                        } else {
                            // logger.debug("lineText before blackened : " + lineTextTmp);
                            lineTextTmp = blackenString(currEntry, lineTextTmp);
                        // logger.debug("lineText after blackened : " + lineTextTmp);
                        }
                    }
                }
                rtl = textIsRTL(lineTextTmp.trim());
                addString(boundRect, baseLineMeanY, lineTextTmp, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
            /*
					 * highlight all tags of this text line if property is set
					 */
            // if (highlightTags){
            // highlightTagsForShape(l);
            // 
            // }
            } else {
            // logger.info("No text content in line: " + l.getId());
            }
            if (highlightTags) {
                if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
                    List<WordType> words = l.getWord();
                    for (WordType wt : words) {
                        TrpWordType w = (TrpWordType) wt;
                        highlightTagsForShape(w, rtl, cache);
                    }
                } else {
                    highlightTagsForShape(l, rtl, cache);
                }
            }
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) Set(java.util.Set) HashSet(java.util.HashSet) Rectangle(java.awt.Rectangle) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Entry(java.util.Map.Entry) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)

Example 3 with TrpBaselineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method highlightTagsForShape.

private void highlightTagsForShape(ITrpShapeType shape, boolean rtl, ExportCache cache) throws IOException {
    int tagId = 0;
    int k = 1;
    Set<Entry<CustomTag, String>> entrySet = ExportUtils.getAllTagsForShapeElement(shape).entrySet();
    // Set<String> wantedTags = ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames());
    Set<String> wantedTags = cache.getOnlySelectedTagnames(CustomTagFactory.getRegisteredTagNames());
    // logger.debug("wanted tags in TRPPDFDOC " + wantedTags.size());
    int[] prevLength = new int[entrySet.size()];
    int[] prevOffset = new int[entrySet.size()];
    boolean falling = true;
    BaselineType baseline = null;
    if (shape instanceof TrpTextLineType) {
        TrpTextLineType l = (TrpTextLineType) shape;
        baseline = l.getBaseline();
    } else if (shape instanceof TrpWordType) {
        TrpWordType w = (TrpWordType) shape;
        TrpTextLineType l = (TrpTextLineType) w.getParentShape();
        baseline = l.getBaseline();
    }
    try {
        List<Point> ptsList = null;
        if (baseline != null) {
            ptsList = PointStrUtils.parsePoints(baseline.getPoints());
        }
        if (ptsList != null) {
            int size = ptsList.size();
            // logger.debug("l.getBaseline().getPoints() " + l.getBaseline().getPoints());
            if (size >= 2 && ptsList.get(0).y < ptsList.get(size - 1).y) {
                // logger.debug("falling is false ");
                falling = false;
            }
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    for (Map.Entry<CustomTag, String> currEntry : entrySet) {
        if (wantedTags.contains(currEntry.getKey().getTagName())) {
            String color = CustomTagFactory.getTagColor(currEntry.getKey().getTagName());
            int currLength = currEntry.getKey().getLength();
            int currOffset = currEntry.getKey().getOffset();
            /**
             * if the current tag overlaps one of the previous tags
             * -> increase the distance of the line under the textline
             */
            // if (isOverlaped(prevOffset, prevLength, currOffset, currLength)){
            // k++;
            // }
            // else{
            // k=1;
            // }
            k = getAmountOfOverlaps(prevOffset, prevLength, currOffset, currLength);
            // logger.debug("current tag name "+ currEntry.getKey().getTagName() + " k is " + k);
            // logger.debug("current tag text "+ currEntry.getKey().getContainedText());
            prevOffset[tagId] = currOffset;
            prevLength[tagId] = currLength;
            tagId++;
            float yShift = (lineMeanHeight / 6) * k;
            /*
				 * remember where to draw line with help of a list
				 */
            if (baseline != null) {
                // use lowest point in baseline and move up one half of the distance to the topmost point
                // java.awt.Rectangle baseLineRect = PageXmlUtils.buildPolygon(baseline.getPoints()).getBounds();
                java.awt.Rectangle baseLineRect = ((TrpBaselineType) baseline).getBoundingBox();
                calculateTagLines(baseLineRect, shape, currEntry.getKey().getContainedText(), currOffset, currLength, color, yShift, falling, rtl);
            }
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) Point(java.awt.Point) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) Point(java.awt.Point) URISyntaxException(java.net.URISyntaxException) JAXBException(javax.xml.bind.JAXBException) FileNotFoundException(java.io.FileNotFoundException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) DocumentException(com.itextpdf.text.DocumentException) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Entry(java.util.Map.Entry) BaselineType(eu.transkribus.core.model.beans.pagecontent.BaselineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with TrpBaselineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method getAverageBeginningOfBaselines.

/*
	 * calculate where the line alignment should be placed
	 * take average of starting points of all lines
	 * problem when some lines are on the right
	 * So take only lines starting within the first 1/10 of the text region width
	 */
private float getAverageBeginningOfBaselines(TextRegionType tr) {
    // logger.debug("calculate average beginning of baselines ");
    float avgStartOfLines = 0;
    double width = tr.getBoundingBox().getWidth();
    float firstTenth = (float) (width / 10);
    int nrOfLines = 0;
    for (TextLineType l : tr.getTextLine()) {
        TrpBaselineType bl = (TrpBaselineType) l.getBaseline();
        if (bl.getBoundingBox().getMinX() <= firstTenth) {
            avgStartOfLines += bl.getBoundingBox().getMinX();
            nrOfLines += 1;
        }
    }
    if (nrOfLines > 0) {
        avgStartOfLines = avgStartOfLines / nrOfLines;
        return avgStartOfLines;
    }
    return (float) tr.getBoundingBox().getMinX();
}
Also used : TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) Point(java.awt.Point)

Example 5 with TrpBaselineType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType in project TranskribusCore by Transkribus.

the class KlosterTeiToPageParser method parsePage.

static void parsePage(Node pbNode, boolean save) throws IOException, JAXBException {
    Element pb = (Element) pbNode;
    String imgFn = pb.getAttribute("facs");
    int pageN = Integer.parseInt(pb.getAttribute("n"));
    int pageHeight = Integer.parseInt(pb.getAttribute("xmlns:h"));
    int pageWidth = Integer.parseInt(pb.getAttribute("xmlns:w"));
    PcGtsType page = PageXmlUtils.createEmptyPcGtsType("imgfn", pageWidth, pageHeight);
    TrpTextRegionType region = new TrpTextRegionType();
    region.setId("region_1");
    System.out.println("page data: imgFn = " + imgFn + " n = " + pageN + " pageWidth = " + pageWidth + " pageHeight = " + pageHeight);
    int minX = 999999, minY = 999999, maxX = -1, maxY = -1;
    Node sibling = pbNode.getNextSibling();
    int lineCount = 0;
    while (sibling != null) {
        if (sibling.getNodeName().equals("pb")) {
            break;
        }
        // System.out.println("sibling type: "+sibling.getTextContent());
        if (sibling.getNodeType() == Node.ELEMENT_NODE && sibling.getNodeName().equals("lb")) {
            Element lb = (Element) sibling;
            int n = Integer.parseInt(lb.getAttribute("n"));
            int x = Integer.parseInt(lb.getAttribute("xmlns:x"));
            int y = Integer.parseInt(lb.getAttribute("xmlns:y"));
            int w = Integer.parseInt(lb.getAttribute("xmlns:w"));
            int h = Integer.parseInt(lb.getAttribute("xmlns:h"));
            if (x < minX)
                minX = x;
            if (y < minY)
                minY = y;
            if (x + w > maxX)
                maxX = x + w;
            if (y + h > maxY)
                maxY = y + h;
            String txt = sibling.getNextSibling().getTextContent();
            txt = StringUtils.stripEnd(txt, " \r\n");
            // System.out.println("line: txt = "+txt+" [x,y,w,h] = ["+x+","+y+","+w+","+h+"]");
            System.out.format("line: n = %d, txt = %s, coords = [%d,%d,%d,%d]\n", n, txt, x, y, w, h);
            TrpTextLineType line = new TrpTextLineType();
            line.setCoords(bbToCoords(x, y, w, h));
            TextEquivType te = new TextEquivType();
            te.setUnicode(txt);
            line.setTextEquiv(te);
            line.setId("line_" + (++lineCount));
            // create baseline:
            TrpBaselineType bl = new TrpBaselineType();
            int yBl = (int) (y + 0.7 * h);
            bl.setPoints(x + "," + yBl + " " + (x + w) + "," + yBl);
            line.setBaseline(bl);
            region.getTextLine().add(line);
        }
        sibling = sibling.getNextSibling();
    // System.out.println("sibling node name: "+sibling.getNodeName());
    // if (!sibling.getNodeName().equals("lb"))
    // break;
    }
    if (!region.getTextLine().isEmpty()) {
        region.setCoords(bbToCoords(minX, minY, maxX - minX, maxY - minY));
    } else {
        region.setCoords(bbToCoords(0, 0, pageWidth, pageHeight));
    }
    page.getPage().getTextRegionOrImageRegionOrLineDrawingRegion().add(region);
    if (save && !region.getTextLine().isEmpty()) {
        File xmlFile = new File(PAGE_DIR + FilenameUtils.getBaseName(imgFn) + ".xml");
        PageXmlUtils.marshalToFile(page, xmlFile);
        FileUtils.copyFile(new File(DIR + imgFn), new File(DST_DIR + imgFn));
        System.out.println("written page to: " + xmlFile.getAbsolutePath());
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) TextEquivType(eu.transkribus.core.model.beans.pagecontent.TextEquivType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) File(java.io.File)

Aggregations

TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)5 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)5 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)3 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)3 Point (java.awt.Point)3 Rectangle (java.awt.Rectangle)3 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)2 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)2 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)2 Entry (java.util.Map.Entry)2 Chunk (com.itextpdf.text.Chunk)1 DocumentException (com.itextpdf.text.DocumentException)1 Phrase (com.itextpdf.text.Phrase)1 TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)1 BaselineType (eu.transkribus.core.model.beans.pagecontent.BaselineType)1 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)1 TextEquivType (eu.transkribus.core.model.beans.pagecontent.TextEquivType)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1