use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method addTextFromTextRegion.
private void addTextFromTextRegion(final TextRegionType tr, final PdfContentByte cb, int cutoffLeft, int cutoffTop, BaseFont bf, ExportCache cache) throws IOException {
List<TextLineType> lines = tr.getTextLine();
boolean firstLine;
if (lines != null && !lines.isEmpty()) {
// sort according to reading order
Collections.sort(lines, new TrpElementReadingOrderComparator<TextLineType>(true));
double baseLineMeanY = 0;
double baseLineMeanYPrev = 0;
double baseLineMeanGap = 0;
// logger.debug("Processing " + lines.size() + " lines in TextRegion " + tr.getId());
for (TextLineType lt : lines) {
TrpTextLineType l = (TrpTextLineType) lt;
// java.awt.Rectangle lineRect = PageXmlUtils.buildPolygon(l.getCoords().getPoints()).getBounds();
// compute rotation of text, if rotation higher PI/16 than rotate otherwise even text
TrpBaselineType baseline = (TrpBaselineType) l.getBaseline();
double rotation = (baseline != null ? computeRotation(baseline) : 0);
// if (lineRect.height > 0){
// float lineHeight = lineRect.height /3;
//
// logger.debug("line height: "+ lineHeight);
//
// //ignore actual lineHeigth if three times the size of the actual line mean heigth
// if (!(lineHeight > lineMeanHeight*4) || lineMeanHeight == 0){
// //calculate line mean Height
// lineMeanHeight = (lineMeanHeight == 0 ? lineHeight : (lineMeanHeight + lineHeight)/2);
// logger.debug("lineMeanHeight: "+ lineMeanHeight);
// }
// }
// get the mean baseline y-value
baseLineMeanYPrev = baseLineMeanY;
if (baseline != null) {
// use lowest point in baseline and move up one half of the distance to the topmost point
java.awt.Rectangle baseLineRect = l.getBoundingBox();
baseLineMeanY = baseLineRect.getMaxY() - ((baseLineRect.getMaxY() - baseLineRect.getMinY()) / 2);
if (baseLineMeanYPrev != 0) {
baseLineMeanGap = baseLineMeanY - baseLineMeanYPrev;
}
}
boolean rtl = false;
if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
List<WordType> words = l.getWord();
for (WordType wt : words) {
TrpWordType w = (TrpWordType) wt;
if (!w.getUnicodeText().isEmpty()) {
// java.awt.Rectangle boundRect = PageXmlUtils.buildPolygon(w.getCoords()).getBounds();
java.awt.Rectangle boundRect = w.getBoundingBox();
String text = w.getUnicodeText();
rtl = textIsRTL(text.trim());
addString(boundRect, baseLineMeanY, text, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
} else {
// logger.info("No text content in word: " + w.getId());
}
}
} else if (!l.getUnicodeText().isEmpty()) {
String lineTextTmp = l.getUnicodeText();
// get surrounding rectangle coords of this line
java.awt.Rectangle boundRect = l.getBoundingBox();
Set<Entry<CustomTag, String>> blackSet = ExportUtils.getAllTagsOfThisTypeForShapeElement(l, RegionTypeUtil.BLACKENING_REGION.toLowerCase()).entrySet();
if (doBlackening && blackSet.size() > 0) {
// for all blackening regions replace text with ****
for (Map.Entry<CustomTag, String> currEntry : blackSet) {
if (!currEntry.getKey().isIndexed()) {
// logger.debug("line not indexed : " + lineTextTmp);
lineTextTmp = lineTextTmp.replaceAll(".", "*");
} else {
// logger.debug("lineText before blackened : " + lineTextTmp);
lineTextTmp = blackenString(currEntry, lineTextTmp);
// logger.debug("lineText after blackened : " + lineTextTmp);
}
}
}
rtl = textIsRTL(lineTextTmp.trim());
addString(boundRect, baseLineMeanY, lineTextTmp, cb, cutoffLeft, cutoffTop, bf, rotation, rtl);
/*
* highlight all tags of this text line if property is set
*/
// if (highlightTags){
// highlightTagsForShape(l);
//
// }
} else {
// logger.info("No text content in line: " + l.getId());
}
if (highlightTags) {
if ((l.getUnicodeText().isEmpty() || useWordLevel) && !l.getWord().isEmpty()) {
List<WordType> words = l.getWord();
for (WordType wt : words) {
TrpWordType w = (TrpWordType) wt;
highlightTagsForShape(w, rtl, cache);
}
} else {
highlightTagsForShape(l, rtl, cache);
}
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method highlightTagsForShape.
private void highlightTagsForShape(ITrpShapeType shape, boolean rtl, ExportCache cache) throws IOException {
int tagId = 0;
int k = 1;
Set<Entry<CustomTag, String>> entrySet = ExportUtils.getAllTagsForShapeElement(shape).entrySet();
// Set<String> wantedTags = ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames());
Set<String> wantedTags = cache.getOnlySelectedTagnames(CustomTagFactory.getRegisteredTagNames());
// logger.debug("wanted tags in TRPPDFDOC " + wantedTags.size());
int[] prevLength = new int[entrySet.size()];
int[] prevOffset = new int[entrySet.size()];
boolean falling = true;
BaselineType baseline = null;
if (shape instanceof TrpTextLineType) {
TrpTextLineType l = (TrpTextLineType) shape;
baseline = l.getBaseline();
} else if (shape instanceof TrpWordType) {
TrpWordType w = (TrpWordType) shape;
TrpTextLineType l = (TrpTextLineType) w.getParentShape();
baseline = l.getBaseline();
}
try {
List<Point> ptsList = null;
if (baseline != null) {
ptsList = PointStrUtils.parsePoints(baseline.getPoints());
}
if (ptsList != null) {
int size = ptsList.size();
// logger.debug("l.getBaseline().getPoints() " + l.getBaseline().getPoints());
if (size >= 2 && ptsList.get(0).y < ptsList.get(size - 1).y) {
// logger.debug("falling is false ");
falling = false;
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
for (Map.Entry<CustomTag, String> currEntry : entrySet) {
if (wantedTags.contains(currEntry.getKey().getTagName())) {
String color = CustomTagFactory.getTagColor(currEntry.getKey().getTagName());
int currLength = currEntry.getKey().getLength();
int currOffset = currEntry.getKey().getOffset();
/**
* if the current tag overlaps one of the previous tags
* -> increase the distance of the line under the textline
*/
// if (isOverlaped(prevOffset, prevLength, currOffset, currLength)){
// k++;
// }
// else{
// k=1;
// }
k = getAmountOfOverlaps(prevOffset, prevLength, currOffset, currLength);
// logger.debug("current tag name "+ currEntry.getKey().getTagName() + " k is " + k);
// logger.debug("current tag text "+ currEntry.getKey().getContainedText());
prevOffset[tagId] = currOffset;
prevLength[tagId] = currLength;
tagId++;
float yShift = (lineMeanHeight / 6) * k;
/*
* remember where to draw line with help of a list
*/
if (baseline != null) {
// use lowest point in baseline and move up one half of the distance to the topmost point
// java.awt.Rectangle baseLineRect = PageXmlUtils.buildPolygon(baseline.getPoints()).getBounds();
java.awt.Rectangle baseLineRect = ((TrpBaselineType) baseline).getBoundingBox();
calculateTagLines(baseLineRect, shape, currEntry.getKey().getContainedText(), currOffset, currLength, color, yShift, falling, rtl);
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method isOnlyRegionInThisRow.
private boolean isOnlyRegionInThisRow(List<TrpRegionType> regions, TextRegionType regionToCompare) {
float minX = 0;
float minY = 0;
float maxX = 0;
float maxY = 0;
float meanX = 0;
float meanY = 0;
java.awt.Rectangle compareBlock = regionToCompare.getBoundingBox();
float compareMinX = (float) compareBlock.getMinX();
float compareMinY = (float) compareBlock.getMinY();
float compareMaxX = (float) compareBlock.getMaxX();
float compareMaxY = (float) compareBlock.getMaxY();
float compareMeanX = compareMinX + (compareMaxX - compareMinX) / 2;
float compareMeanY = compareMinY + (compareMaxY - compareMinY) / 2;
boolean foundSmallerColumn = false;
if (regions.size() == 1) {
return true;
} else {
for (RegionType r : regions) {
// TODO add paths for tables etc.
if (r instanceof TextRegionType && r.getId() != regionToCompare.getId()) {
TextRegionType tr = (TextRegionType) r;
// empty region can be ignored
if (tr.getTextLine().isEmpty())
continue;
else {
// region with empty lines can also be ignored
boolean textFound = false;
for (TextLineType tlt : tr.getTextLine()) {
TrpTextLineType l = (TrpTextLineType) tlt;
textFound = !l.getUnicodeText().isEmpty();
if (textFound) {
break;
}
}
// no text in region -> go to next region
if (!textFound) {
continue;
}
}
// logger.debug("tr id " + tr.getId());
// compute average text region start
// java.awt.Rectangle block = PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds();
java.awt.Rectangle block = tr.getBoundingBox();
minX = (float) block.getMinX();
maxX = (float) block.getMaxX();
minY = (float) block.getMinY();
maxY = (float) block.getMaxY();
// meanX = minX+(maxX - minX)/2;
meanY = minY + (maxY - minY) / 2;
if (((meanY > compareMinY && meanY < compareMaxY) || (compareMeanY > minY && compareMeanY < maxY))) {
return false;
}
}
}
}
return true;
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.
the class CustomTagListTest method testCommonTag.
@Test
public void testCommonTag() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
TextStyleTag ts1 = new TextStyleTag(0, 10);
ts1.setBold(true);
tl.addOrMergeTag(ts1, null);
logger.debug("ts1 = " + tl);
TextStyleTag ts2 = new TextStyleTag(3, 3);
ts2.setItalic(true);
tl.addOrMergeTag(ts2, "italic");
logger.debug("ts2 = " + tl);
// ts2.setFontFamily("hello");
// tl.addOrMergeTag(ts2, null);
// TextStyleTag t1 = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 3, 3);
// logger.debug("t1 = "+t1);
Assert.assertEquals("Nr. of merged elements must be 3", 3, tl.getTags().size());
TextStyleTag common = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 2, 6);
TextStyleTag check = new TextStyleTag();
check.setBold(true);
check.setOffset(2);
check.setLength(6);
logger.debug("common = " + common);
logger.debug("check = " + common);
Assert.assertTrue("", common.equalsEffectiveValues(check, true));
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType in project TranskribusCore by Transkribus.
the class CustomTagListTest method testMultipleRandomIndexedAddOrMergeTag.
// @Ignore
@Test
public void testMultipleRandomIndexedAddOrMergeTag() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
int textLength = tl.getTextLength();
CustomTag wholeRangeTag = new CustomTag("test", 0, textLength);
String[] nonIndexedTags = new String[] { "a_non_indexed", "b_ni", "c_balbla_non_indexed" };
// + overlap to test exceptions when index out of bounds!
int rangeOfTags = textLength + 5;
final int N = (int) 1e3;
for (int i = 0; i < N; ++i) {
// int sizeBefore = tl.getIndexedTags("test").size();
int o = rand.nextInt(rangeOfTags);
int l = rand.nextInt(rangeOfTags - o) + 1;
CustomTag ct = null;
int d = rand.nextInt(3);
if (d == 0) {
// structure tag
ct = new CustomTag("a_test_indexed", o, l);
} else if (d == 1) {
// text style tag
ct = new TextStyleTag(o, l);
((TextStyleTag) ct).setBold(rand.nextBoolean());
((TextStyleTag) ct).setItalic(rand.nextBoolean());
((TextStyleTag) ct).setMonospace(rand.nextBoolean());
} else {
ct = new CustomTag(nonIndexedTags[rand.nextInt(3)]);
// ct = new CustomTag("non-indexed");
}
// logger.info("range: "+o+","+l);
// CustomTag ct = new CustomTag("test", o, l);
logger.trace("i=" + i + "/" + N);
logger.trace("adding custom tag: " + ct);
logger.trace("list before = " + tl);
try {
tl.addOrMergeTag(ct, null);
Assert.assertTrue("Indexed CustomTag was not inside but no exception thrown: " + ct, !ct.isIndexed() || wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
} catch (IndexOutOfBoundsException ie) {
Assert.assertTrue("CustomTag was inside but exception thrown: " + ct, wholeRangeTag.getOverlapType(ct) != OverlapType.INSIDE);
logger.trace("Exception for tag not inside: " + ct);
}
logger.trace("list after = " + tl);
checkIntegrity(tl);
// int sizeAfter = tl.getIndexedTags("test").size();
// logger.debug("sizeAfter = "+sizeAfter);
}
logger.info("list = " + tl);
}
Aggregations