use of eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType in project TranskribusCore by Transkribus.
the class PageXmlDaoTest method main.
public static void main(String[] args) {
TrpDoc doc = FakeDocProvider.create(false);
TrpTranscriptMetadata md = doc.getPages().get(0).getTranscripts().get(0);
try {
JAXBPageTranscript transcript = new JAXBPageTranscript(md);
transcript.build();
// JAXBPageTranscript transcript = TrpPageTranscriptBuilder.build(md);
// get Source Document as String
// DOMSource domSource = new DOMSource(transcript.getSourceDoc());
// StringWriter writer = new StringWriter();
// StreamResult result = new StreamResult(writer);
// TransformerFactory tf = TransformerFactory.newInstance();
// Transformer transformer = tf.newTransformer();
// transformer.transform(domSource, result);
// System.out.println("XML IN String format is: \n" + writer.toString());
// check JaxB Element
PcGtsType page = transcript.getPageData();
if (page == null) {
System.out.println("page XML is null");
System.exit(0);
}
PageType pageType = page.getPage();
System.out.println(page.getMetadata());
if (pageType == null) {
System.out.println("pagetype element is null");
System.exit(0);
}
if (pageType.getTextRegionOrImageRegionOrLineDrawingRegion() == null) {
System.out.println("Region list is null");
System.exit(0);
}
int i = 0;
for (TextRegionType tr : transcript.getPage().getTextRegions(true)) {
tr.setId("" + i++);
}
List<TrpRegionType> regions = pageType.getTextRegionOrImageRegionOrLineDrawingRegion();
for (RegionType r : regions) {
if (r instanceof TextRegionType) {
TextRegionType t = (TextRegionType) r;
System.out.println(t.getId());
}
}
} catch (IllegalArgumentException | IOException e) {
e.printStackTrace();
}
// try {
// PrimaPageTranscript ppt = PageXmlDao.getPrimaPageTranscript(md);
// Page page = ppt.getPageData();
//
// System.out.println(page.getImageFilename());
// Region r = page.getLayout().getRegion("tempReg357564684568544579089");
// System.out.println(r.getType().getName());
// // System.out.println(page.getLayout().getParentChildRelation(r.getType(), r.getId().toString()).getRelationType());
//
// IdRegister idr = r.getIdRegister();
// // idr.
// System.out.println(idr);
// } catch (IllegalArgumentException e) {
//
// e.printStackTrace();
// } catch (MalformedURLException e) {
//
// e.printStackTrace();
// } catch (UnsupportedFormatVersionException e) {
//
// e.printStackTrace();
// }
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method addPage.
@SuppressWarnings("unused")
public void addPage(URL imgUrl, TrpDoc doc, PcGtsType pc, boolean addAdditionalPlainTextPage, boolean imageOnly, FimgStoreImgMd md, boolean doBlackening, ExportCache cache) throws MalformedURLException, IOException, DocumentException, JAXBException, URISyntaxException {
imgOnly = imageOnly;
extraTextPage = addAdditionalPlainTextPage;
// FIXME use this only on cropped (printspace) images!!
java.awt.Rectangle printspace = null;
// if(pc.getPage() != null && pc.getPage().getPrintSpace() != null){
// java.awt.Polygon psPoly = PageXmlUtils.buildPolygon(pc.getPage().getPrintSpace().getCoords());
// printspace = psPoly.getBounds();
// }
BufferedImage imgBuffer = null;
try (InputStream input = imgUrl.openStream()) {
imgBuffer = ImageIO.read(input);
} catch (FileNotFoundException e) {
logger.error("File was not found at url " + imgUrl);
URL origUrl = new URL(imgUrl.getProtocol(), imgUrl.getHost(), imgUrl.getFile().replace("view", "orig"));
logger.debug("try orig file location " + origUrl);
try (InputStream input = origUrl.openStream()) {
imgBuffer = ImageIO.read(input);
}
}
Graphics2D graph = imgBuffer.createGraphics();
graph.setColor(Color.BLACK);
List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
// regions should be sorted after their reading order at this point - so no need to resort
// Collections.sort(regions, new TrpElementCoordinatesComparator<RegionType>());
int nrOfTextRegions = 0;
for (RegionType r : regions) {
// used later to decide if new page is necessary if there is at least one text region
if (r instanceof TextRegionType) {
nrOfTextRegions++;
} else if (r instanceof UnknownRegionType && doBlackening) {
UnknownRegionType urt = (UnknownRegionType) r;
ITrpShapeType trpShape = (ITrpShapeType) r;
boolean isBlackening = RegionTypeUtil.isBlackening(trpShape);
if (isBlackening) {
// Rectangle blackRect = (Rectangle) PageXmlUtils.buildPolygon(urt.getCoords().getPoints()).getBounds();
Rectangle blackRect = urt.getBoundingBox();
graph.fillRect((int) blackRect.getMinX(), (int) blackRect.getMinY(), (int) blackRect.getWidth(), (int) blackRect.getHeight());
}
}
}
graph.dispose();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(imgBuffer, "JPEG", baos);
byte[] imageBytes = baos.toByteArray();
Image img = Image.getInstance(imageBytes);
baos.close();
imgBuffer.flush();
imgBuffer = null;
/*
* take resolution from metadata of image store, values in img are not always set
*/
if (md != null) {
double resolutionX = (float) md.getXResolution();
double resolutionY = (float) md.getYResolution();
// logger.debug("Dpi: " + md.getXResolution());
img.setDpi((int) resolutionX, (int) resolutionY);
}
// else{
//
// Image img = Image.getInstance(imgUrl);
// }
int cutoffLeft = 0;
int cutoffTop = 0;
if (printspace == null) {
/*
* 1 Punkt pro cm = 2,54 dpi
* img.getPlainWidth() = horizontal size in Pixel
* img.getPlainHeight() = vertical size in Pixel
* img.getDpiX() = resolution of x direction
* Size in cm: img.getDpiX() / (img.getDpiX()/2,54)
*/
// logger.debug("Horizontal size in cm: img.getPlainWidth() / (img.getDpiX()/2,54): " + img.getPlainWidth() / (img.getDpiX()/2.54));
// logger.debug("Vertical size in cm: img.getPlainHeight() / (img.getDpiY()/2,54): " + img.getPlainHeight() / (img.getDpiY()/2.54));
setPageSize(img);
} else {
int width = (int) printspace.getWidth();
int height = (int) printspace.getHeight();
setPageSize(new com.itextpdf.text.Rectangle(width, height));
cutoffLeft = printspace.x;
cutoffTop = printspace.y;
}
float xSize;
float ySize;
/*
* calculate size of image with respect to Dpi of the image and the default points of PDF which is 72
* PDF also uses the same basic measurement unit as PostScript: 72 points == 1 inch
*/
if (img.getDpiX() > 72f) {
xSize = (float) (img.getPlainWidth() / img.getDpiX() * 72);
ySize = (float) (img.getPlainHeight() / img.getDpiY() * 72);
scaleFactorX = scaleFactorY = (float) (72f / img.getDpiX());
} else {
xSize = (float) (img.getPlainWidth() / 300 * 72);
ySize = (float) (img.getPlainHeight() / 300 * 72);
scaleFactorX = scaleFactorY = 72f / 300;
}
/*
* construct the grid for the added page
*/
for (int i = 0; i <= 12; i++) {
twelfthPoints[i][0] = i * (img.getPlainWidth() / 12);
twelfthPoints[i][1] = i * (img.getPlainHeight() / 12);
}
// TODO use scaleToFit instead?
img.scaleAbsolute(xSize, ySize);
img.setAbsolutePosition(0, 0);
/*
* calculate physical size of image in inch and assign text size dependent on these values
*/
if (img.getScaledWidth() / 72f < 9 && img.getScaledHeight() / 72f < 12) {
lineMeanHeight = 12 / scaleFactorY;
} else {
lineMeanHeight = 17 / scaleFactorY;
}
if (doc != null && createTitle) {
addTitlePage(doc);
// logger.debug("page number " + getPageNumber());
if (getPageNumber() % 1 != 0) {
logger.debug("odd page number -> add one new page");
document.newPage();
// necessary that an empty page can be created
writer.setPageEmpty(false);
}
}
document.newPage();
addTextAndImage(pc, cutoffLeft, cutoffTop, img, imageOnly, cache);
if (addAdditionalPlainTextPage) {
if (nrOfTextRegions > 0) {
logger.debug("add uniform text");
document.newPage();
addUniformText(pc, cutoffLeft, cutoffTop, cache);
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method addTextAndImage.
private void addTextAndImage(PcGtsType pc, int cutoffLeft, int cutoffTop, Image img, boolean imageOnly, ExportCache cache) throws DocumentException, IOException {
lineAndColorList.clear();
PdfContentByte cb = writer.getDirectContentUnder();
cb.setColorFill(BaseColor.BLACK);
cb.setColorStroke(BaseColor.BLACK);
// BaseFont bf = BaseFont.createFont(BaseFont.TIMES_ROMAN, "UTF-8", BaseFont.NOT_EMBEDDED);
if (!imageOnly) {
cb.beginLayer(ocrLayer);
cb.setFontAndSize(bfArial, 32);
List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
/*
* use reading order comparator for sorting since at this time reading order is more trustable
* other sorting is not transitive and seldomly produces "Comparison violates its general contract" exception
*/
Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
for (RegionType r : regions) {
// TODO add paths for tables etc.
if (r instanceof TrpTableRegionType) {
exportTable(r, cb, cutoffLeft, cutoffTop, false, cache);
} else if (r instanceof TextRegionType) {
TextRegionType tr = (TextRegionType) r;
// PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds().getMinX();
addTextFromTextRegion(tr, cb, cutoffLeft, cutoffTop, bfArial, cache);
}
}
// scale after calculating lineMeanHeightForAllRegions
// lineMeanHeight = lineMeanHeight/scaleFactorX;
cb.endLayer();
}
cb.beginLayer(imgLayer);
cb.addImage(img);
cb.endLayer();
if (highlightTags) {
highlightAllTagsOnImg(lineAndColorList, cb, cutoffLeft, cutoffTop);
}
/*
* draw tag lines
*/
// addTocLinks(doc, page,cutoffTop);
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method hasSmallerColumn.
/*
* checks if there is at least one text region on the left of the actual one
* But: if text region is completely contained in the other it should not have an effect
*/
private boolean hasSmallerColumn(List<TrpRegionType> regions, TextRegionType regionToCompare) throws DocumentException, IOException {
float minX = 0;
float minY = 0;
float maxX = 0;
float maxY = 0;
float meanX = 0;
float meanY = 0;
// java.awt.Rectangle compareBlock = PageXmlUtils.buildPolygon(regionToCompare.getCoords().getPoints()).getBounds();
java.awt.Rectangle compareBlock = regionToCompare.getBoundingBox();
float compareMinX = (float) compareBlock.getMinX();
float compareMinY = (float) compareBlock.getMinY();
float compareMaxX = (float) compareBlock.getMaxX();
float compareMaxY = (float) compareBlock.getMaxY();
float compareMeanX = compareMinX + (compareMaxX - compareMinX) / 2;
float compareMeanY = compareMinY + (compareMaxY - compareMinY) / 2;
boolean foundSmallerColumn = false;
smallerRegionMaxX = 0;
if (regions.size() == 1) {
return false;
} else {
for (RegionType r : regions) {
// TODO add paths for tables etc.
if (r instanceof TextRegionType && r.getId() != regionToCompare.getId()) {
TextRegionType tr = (TextRegionType) r;
// empty region can be ignored
if (tr.getTextLine().isEmpty())
continue;
else {
// region with empty lines can also be ignored
boolean textFound = false;
for (TextLineType tlt : tr.getTextLine()) {
TrpTextLineType l = (TrpTextLineType) tlt;
textFound = !l.getUnicodeText().isEmpty();
if (textFound) {
break;
}
}
// no text in region -> go to next region
if (!textFound) {
continue;
}
}
// logger.debug("tr id " + tr.getId());
// compute average text region start
// java.awt.Rectangle block = PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds();
java.awt.Rectangle block = tr.getBoundingBox();
minX = (float) block.getMinX();
maxX = (float) block.getMaxX();
minY = (float) block.getMinY();
maxY = (float) block.getMaxY();
// meanX = minX+(maxX - minX)/2;
meanY = minY + (maxY - minY) / 2;
if (((meanY > compareMinY && meanY < compareMaxY) || (compareMeanY > minY && compareMeanY < maxY)) && (maxX < compareMeanX)) {
// to find the biggest maxX if there are several smaller columns
if (maxX > smallerRegionMaxX) {
smallerRegionMaxX = maxX;
}
foundSmallerColumn = true;
}
}
}
}
return foundSmallerColumn;
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType in project TranskribusCore by Transkribus.
the class TrpTeiStringBuilder method setContent.
@Override
protected void setContent(List<TrpPage> pages) throws JAXBException, InterruptedException {
SebisStringBuilder sbFacsimile = new SebisStringBuilder();
SebisStringBuilder sbText = new SebisStringBuilder();
sbText.incIndent();
sbText.addLine("<text>");
sbText.incIndent();
sbText.addLine("<body>");
// sbText.incIndent();
// text = tei.createElementNS(TEI_NS, "text");
// body = tei.createElementNS(TEI_NS, "body");
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Creating TEI", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
throw new InterruptedException("Export was canceled by user");
// break;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage p = pages.get(i);
logger.debug("1Processing page " + p.getPageNr() + ": " + p.getUrl() + " - XML=" + p.getCurrentTranscript().getUrl());
// check buffer for transcript or unmarshal the page XML
PcGtsType pc = this.getPcGtsTypeForPage(p);
if (pars.hasZones()) {
// create a facsimile element for each page that are appended to the root element of the TEI after header
openFacsimileElement(sbFacsimile, p, pc);
}
// create page-break element for each page as child of body element:
writePageBreak(sbText, p, pc);
//
// // append all text-regions / lines / words to the xml:
List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
for (TrpRegionType r : regions) {
// System.out.println(r.getClass());
if (r instanceof TextRegionType) {
if (pars.hasZones()) {
writeZonesForTextRegion(sbFacsimile, (TrpTextRegionType) r, p.getPageNr());
}
writeTextForTextRegion(sbText, (TrpTextRegionType) r, p.getPageNr());
} else {
// write other regions
if (pars.hasZones()) {
String facsId = FACS_ID_PREFIX + p.getPageNr();
writeZoneForShape(sbFacsimile, r, facsId, true);
}
}
}
if (pars.hasZones()) {
closeFacsimilieElement(sbFacsimile);
}
++c;
if (monitor != null) {
monitor.worked(c);
}
}
// text.appendChild(body);
// root.appendChild(text);
// sbText.decIndent();
sbText.addLine("</body>");
sbText.decIndent();
sbText.addLine("</text>");
sbText.decIndent();
sbTotal.sb.append(sbFacsimile.toString());
sbTotal.sb.append(sbText.toString());
}
Aggregations