use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method highlightTagsForShape.
private void highlightTagsForShape(ITrpShapeType shape, boolean rtl, ExportCache cache) throws IOException {
int tagId = 0;
int k = 1;
Set<Entry<CustomTag, String>> entrySet = ExportUtils.getAllTagsForShapeElement(shape).entrySet();
// Set<String> wantedTags = ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames());
Set<String> wantedTags = cache.getOnlySelectedTagnames(CustomTagFactory.getRegisteredTagNames());
// logger.debug("wanted tags in TRPPDFDOC " + wantedTags.size());
int[] prevLength = new int[entrySet.size()];
int[] prevOffset = new int[entrySet.size()];
boolean falling = true;
BaselineType baseline = null;
if (shape instanceof TrpTextLineType) {
TrpTextLineType l = (TrpTextLineType) shape;
baseline = l.getBaseline();
} else if (shape instanceof TrpWordType) {
TrpWordType w = (TrpWordType) shape;
TrpTextLineType l = (TrpTextLineType) w.getParentShape();
baseline = l.getBaseline();
}
try {
List<Point> ptsList = null;
if (baseline != null) {
ptsList = PointStrUtils.parsePoints(baseline.getPoints());
}
if (ptsList != null) {
int size = ptsList.size();
// logger.debug("l.getBaseline().getPoints() " + l.getBaseline().getPoints());
if (size >= 2 && ptsList.get(0).y < ptsList.get(size - 1).y) {
// logger.debug("falling is false ");
falling = false;
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
for (Map.Entry<CustomTag, String> currEntry : entrySet) {
if (wantedTags.contains(currEntry.getKey().getTagName())) {
String color = CustomTagFactory.getTagColor(currEntry.getKey().getTagName());
int currLength = currEntry.getKey().getLength();
int currOffset = currEntry.getKey().getOffset();
/**
* if the current tag overlaps one of the previous tags
* -> increase the distance of the line under the textline
*/
// if (isOverlaped(prevOffset, prevLength, currOffset, currLength)){
// k++;
// }
// else{
// k=1;
// }
k = getAmountOfOverlaps(prevOffset, prevLength, currOffset, currLength);
// logger.debug("current tag name "+ currEntry.getKey().getTagName() + " k is " + k);
// logger.debug("current tag text "+ currEntry.getKey().getContainedText());
prevOffset[tagId] = currOffset;
prevLength[tagId] = currLength;
tagId++;
float yShift = (lineMeanHeight / 6) * k;
/*
* remember where to draw line with help of a list
*/
if (baseline != null) {
// use lowest point in baseline and move up one half of the distance to the topmost point
// java.awt.Rectangle baseLineRect = PageXmlUtils.buildPolygon(baseline.getPoints()).getBounds();
java.awt.Rectangle baseLineRect = ((TrpBaselineType) baseline).getBoundingBox();
calculateTagLines(baseLineRect, shape, currEntry.getKey().getContainedText(), currOffset, currLength, color, yShift, falling, rtl);
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method calculateTagLines.
private void calculateTagLines(java.awt.Rectangle baseLineRect, ITrpShapeType shape, String tagText, int offset, int length, String color, float yShift, boolean falling, boolean rtl) {
String lineText = shape.getUnicodeText();
java.awt.Rectangle shapeRect = null;
if (shape instanceof TrpWordType) {
shapeRect = ((TrpWordType) shape).getBoundingBox();
} else {
shapeRect = baseLineRect;
}
float shapeMinX = (float) shapeRect.getMinX();
float shapeMaxX = (float) shapeRect.getMaxX();
float minX = (float) baseLineRect.getMinX();
float maxX = (float) baseLineRect.getMaxX();
float minY = (float) baseLineRect.getMinY();
float maxY = (float) baseLineRect.getMaxY();
float a = maxY - minY;
float b = maxX - minX;
float angleAlpha = (float) Math.atan(a / b);
// logger.debug("line Text " + lineText);
// logger.debug("tag text " + tagText);
// logger.debug("angle alpha " + angleAlpha);
//
// logger.debug("offset " + offset);
// logger.debug("lineText.length() " + lineText.length());
// logger.debug("offset+length " + offset+length);
// relation of tagStart to entire text length
float ratioOfTagStart = 0;
if (offset != 0) {
ratioOfTagStart = (float) offset / (float) lineText.length();
}
float ratioOfTagEnd = (float) (offset + length) / (float) lineText.length();
float tagStartX;
float tagEndX;
if (!rtl) {
tagStartX = shapeMinX + (ratioOfTagStart * baseLineRect.width);
tagEndX = shapeMinX + (ratioOfTagEnd * shapeRect.width);
} else {
tagStartX = shapeMaxX - (ratioOfTagStart * baseLineRect.width);
tagEndX = shapeMaxX - (ratioOfTagEnd * shapeRect.width);
}
float tagStartHeight = 0;
if (tagStartX != shapeMinX && !rtl) {
tagStartHeight = (float) (Math.tan(angleAlpha) * (tagStartX - shapeMinX));
} else if (tagStartX != shapeMaxX && rtl) {
tagStartHeight = (float) (Math.tan(angleAlpha) * (tagStartX - shapeMinX));
}
float tagEndHeight = (float) (Math.tan(angleAlpha) * (tagEndX - shapeMinX));
float tagStartY;
float tagEndY;
if (falling) {
// logger.debug("tagStartHeight > tagEndHeight; tagStartY = maxY - tagStartHeight;" + (maxY - tagStartHeight));
// logger.debug("tagStartHeight > tagEndHeight; tagEndY = maxY - tagEndHeight;" + (maxY - tagEndHeight));
tagStartY = maxY - tagStartHeight;
tagEndY = maxY - tagEndHeight;
} else {
tagStartY = maxY - tagEndHeight;
tagEndY = maxY - tagStartHeight;
}
// logger.debug("tag startX " + tagStartX);
// logger.debug("tag endX " + tagEndX);
//
// logger.debug("tag startY " + tagStartY);
// logger.debug("tag endY " + tagEndY);
Line2D line = new Line2D.Double(tagStartX, tagStartY + yShift, tagEndX, tagEndY + yShift);
java.util.Map.Entry<Line2D, String> pair = new java.util.AbstractMap.SimpleEntry<>(line, color);
lineAndColorList.add(pair);
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class TrpXlsxBuilder method writeXlsxForDoc.
public static void writeXlsxForDoc(TrpDoc doc, boolean wordBased, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTagsException, Exception {
if (cache == null) {
throw new IllegalArgumentException("ExportCache must not be null.");
}
if (cache.getCustomTagMapForDoc().isEmpty()) {
logger.info("No tags to store -> Xlsx export cancelled");
throw new NoTagsException("No tags available to store into Xlsx");
}
List<TrpPage> pages = doc.getPages();
String exportPath = exportFile.getPath();
Set<String> selectedTags = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to Excel", totalPages);
}
wb = new XSSFWorkbook();
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
throw new InterruptedException("Export was canceled by user");
// logger.debug("Xlsx export cancelled!");
// return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
// try to get previously loaded JAXB transcript
JAXBPageTranscript tr = null;
if (cache != null) {
tr = cache.getPageTranscriptAtIndex(i);
}
if (tr == null) {
TrpTranscriptMetadata md = page.getCurrentTranscript();
tr = new JAXBPageTranscript(md);
tr.build();
}
// old version
// TrpPage page = pages.get(i);
// TrpTranscriptMetadata md = page.getCurrentTranscript();
// JAXBPageTranscript tr = new JAXBPageTranscript(md);
// tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
for (int j = 0; j < textRegions.size(); ++j) {
TrpTextRegionType r = textRegions.get(j);
List<TextLineType> lines = r.getTextLine();
for (int k = 0; k < lines.size(); ++k) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
List<WordType> words = trpL.getWord();
if (wordBased) {
for (int l = 0; l < words.size(); ++l) {
TrpWordType w = (TrpWordType) words.get(l);
writeTagsForShapeElement(w, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), w.getId(), selectedTags);
}
} else {
writeTagsForShapeElement(trpL, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), "", selectedTags);
}
}
}
++c;
if (monitor != null) {
monitor.worked(c);
}
}
/*
* auto size the columns
*/
for (int i = 0; i < wb.getNumberOfSheets(); i++) {
int numberOfCells = 0;
Iterator rowIterator = wb.getSheetAt(i).rowIterator();
/**
* Escape the header row *
*/
if (rowIterator.hasNext()) {
Row headerRow = (Row) rowIterator.next();
// get the number of cells in the header row
numberOfCells = headerRow.getPhysicalNumberOfCells();
for (int j = 0; j < numberOfCells; j++) {
wb.getSheetAt(i).autoSizeColumn(j);
}
}
}
FileOutputStream fOut;
try {
// means no tags at all
if (wb.getNumberOfSheets() == 0) {
throw new IOException("Sorry - No tags available for export");
}
fOut = new FileOutputStream(exportPath);
wb.write(fOut);
fOut.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw e;
}
logger.info("wrote xlsx to: " + exportPath);
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class TrpTeiStringBuilder method writeZonesForTextRegion.
void writeZonesForTextRegion(SebisStringBuilder sb, TextRegionType r, int pageNr) {
String facsId = FACS_ID_PREFIX + pageNr;
if (pars.regionZones) {
writeZoneForShape(sb, (TrpTextRegionType) r, facsId, !pars.lineZones && !pars.wordZones);
}
if (!pars.lineZones && !pars.wordZones)
return;
for (TextLineType tl : r.getTextLine()) {
TrpTextLineType ttl = (TrpTextLineType) tl;
if (pars.lineZones) {
writeZoneForShape(sb, ttl, facsId, !pars.wordZones);
}
if (pars.wordZones) {
for (WordType w : ttl.getWord()) {
TrpWordType tw = (TrpWordType) w;
writeZoneForShape(sb, tw, facsId, true);
}
if (pars.lineZones) {
closeElement(sb, "zone");
}
}
}
if (pars.regionZones) {
closeElement(sb, "zone");
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class TrpTeiStringBuilder method writeTextForTextRegion.
void writeTextForTextRegion(SebisStringBuilder sb, TextRegionType r, int pageNr) {
String facsId = FACS_ID_PREFIX + pageNr;
if (r.getTextLine().isEmpty()) {
logger.warn("skipping empty region: " + r.getId());
return;
}
writeTextRegion(sb, r, facsId);
for (TextLineType tl : r.getTextLine()) {
TrpTextLineType ttl = (TrpTextLineType) tl;
if (!commonPars.isWriteTextOnWordLevel()) {
writeLineOrWord(sb, ttl, facsId);
} else {
String lStart = getLineOrWordStart(ttl, facsId);
sb.incIndent();
sb.addLine(lStart);
// TODO: write text for words???
for (WordType w : ttl.getWord()) {
writeLineOrWord(sb, (TrpWordType) w, facsId);
}
String lEnd = getLineOrWordEnd(ttl, facsId);
sb.addLine(lEnd);
// sb.append("\n");
sb.decIndent();
}
}
closeTextRegion(sb);
}
Aggregations