Search in sources :

Example 6 with ITrpShapeType

use of eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType in project TranskribusCore by Transkribus.

the class TrpPageUnmarshalListener method syncTags.

/**
 * sync tags with registry for each and every shape
 */
private void syncTags(Object target) {
    if (!(target instanceof ITrpShapeType)) {
        return;
    }
    ITrpShapeType st = (ITrpShapeType) target;
    // manually call setter method for custom tag as JAXB does not call setters!
    st.setCustom(st.getCustom());
    if (st.getCustomTagList() != null) {
        // try registering (possibly new) tags:
        for (CustomTag t : st.getCustomTagList().getTags()) {
            try {
                boolean mergeAttributes = false;
                CustomTagFactory.addToRegistry(t, null, mergeAttributes);
            } catch (NoSuchMethodException | SecurityException | IllegalAccessException | InvocationTargetException e) {
                logger.error("Could not register the tag: " + t.getCssStr() + ", reason: " + e.getMessage(), e);
            }
        }
    }
}
Also used : CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) ITrpShapeType(eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 7 with ITrpShapeType

use of eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method addPage.

@SuppressWarnings("unused")
public void addPage(URL imgUrl, TrpDoc doc, PcGtsType pc, boolean addAdditionalPlainTextPage, boolean imageOnly, FimgStoreImgMd md, boolean doBlackening, ExportCache cache) throws MalformedURLException, IOException, DocumentException, JAXBException, URISyntaxException {
    imgOnly = imageOnly;
    extraTextPage = addAdditionalPlainTextPage;
    // FIXME use this only on cropped (printspace) images!!
    java.awt.Rectangle printspace = null;
    // if(pc.getPage() != null && pc.getPage().getPrintSpace() != null){
    // java.awt.Polygon psPoly = PageXmlUtils.buildPolygon(pc.getPage().getPrintSpace().getCoords());
    // printspace = psPoly.getBounds();
    // }
    BufferedImage imgBuffer = null;
    try (InputStream input = imgUrl.openStream()) {
        imgBuffer = ImageIO.read(input);
    } catch (FileNotFoundException e) {
        logger.error("File was not found at url " + imgUrl);
        URL origUrl = new URL(imgUrl.getProtocol(), imgUrl.getHost(), imgUrl.getFile().replace("view", "orig"));
        logger.debug("try orig file location " + origUrl);
        try (InputStream input = origUrl.openStream()) {
            imgBuffer = ImageIO.read(input);
        }
    }
    Graphics2D graph = imgBuffer.createGraphics();
    graph.setColor(Color.BLACK);
    List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
    // regions should be sorted after their reading order at this point - so no need to resort
    // Collections.sort(regions, new TrpElementCoordinatesComparator<RegionType>());
    int nrOfTextRegions = 0;
    for (RegionType r : regions) {
        // used later to decide if new page is necessary if there is at least one text region
        if (r instanceof TextRegionType) {
            nrOfTextRegions++;
        } else if (r instanceof UnknownRegionType && doBlackening) {
            UnknownRegionType urt = (UnknownRegionType) r;
            ITrpShapeType trpShape = (ITrpShapeType) r;
            boolean isBlackening = RegionTypeUtil.isBlackening(trpShape);
            if (isBlackening) {
                // Rectangle blackRect = (Rectangle) PageXmlUtils.buildPolygon(urt.getCoords().getPoints()).getBounds();
                Rectangle blackRect = urt.getBoundingBox();
                graph.fillRect((int) blackRect.getMinX(), (int) blackRect.getMinY(), (int) blackRect.getWidth(), (int) blackRect.getHeight());
            }
        }
    }
    graph.dispose();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ImageIO.write(imgBuffer, "JPEG", baos);
    byte[] imageBytes = baos.toByteArray();
    Image img = Image.getInstance(imageBytes);
    baos.close();
    imgBuffer.flush();
    imgBuffer = null;
    /*
		 * take resolution from metadata of image store, values in img are not always set
		 */
    if (md != null) {
        double resolutionX = (float) md.getXResolution();
        double resolutionY = (float) md.getYResolution();
        // logger.debug("Dpi: " + md.getXResolution());
        img.setDpi((int) resolutionX, (int) resolutionY);
    }
    // else{
    // 
    // Image img = Image.getInstance(imgUrl);
    // }
    int cutoffLeft = 0;
    int cutoffTop = 0;
    if (printspace == null) {
        /*
			 * 1 Punkt pro cm  = 2,54 dpi
			 * img.getPlainWidth() = horizontal size in Pixel
			 * img.getPlainHeight() = vertical size in Pixel
			 * img.getDpiX() = resolution of x direction
			 * Size in cm: img.getDpiX() / (img.getDpiX()/2,54)
			 */
        // logger.debug("Horizontal size in cm: img.getPlainWidth() / (img.getDpiX()/2,54): " + img.getPlainWidth() / (img.getDpiX()/2.54));
        // logger.debug("Vertical size in cm: img.getPlainHeight() / (img.getDpiY()/2,54): " + img.getPlainHeight() / (img.getDpiY()/2.54));
        setPageSize(img);
    } else {
        int width = (int) printspace.getWidth();
        int height = (int) printspace.getHeight();
        setPageSize(new com.itextpdf.text.Rectangle(width, height));
        cutoffLeft = printspace.x;
        cutoffTop = printspace.y;
    }
    float xSize;
    float ySize;
    /*
		 * calculate size of image with respect to Dpi of the image and the default points of PDF which is 72
		 * PDF also uses the same basic measurement unit as PostScript: 72 points == 1 inch
		 */
    if (img.getDpiX() > 72f) {
        xSize = (float) (img.getPlainWidth() / img.getDpiX() * 72);
        ySize = (float) (img.getPlainHeight() / img.getDpiY() * 72);
        scaleFactorX = scaleFactorY = (float) (72f / img.getDpiX());
    } else {
        xSize = (float) (img.getPlainWidth() / 300 * 72);
        ySize = (float) (img.getPlainHeight() / 300 * 72);
        scaleFactorX = scaleFactorY = 72f / 300;
    }
    /*
		 * construct the grid for the added page
		 */
    for (int i = 0; i <= 12; i++) {
        twelfthPoints[i][0] = i * (img.getPlainWidth() / 12);
        twelfthPoints[i][1] = i * (img.getPlainHeight() / 12);
    }
    // TODO use scaleToFit instead?
    img.scaleAbsolute(xSize, ySize);
    img.setAbsolutePosition(0, 0);
    /*
		 * calculate physical size of image in inch and assign text size dependent on these values
		 */
    if (img.getScaledWidth() / 72f < 9 && img.getScaledHeight() / 72f < 12) {
        lineMeanHeight = 12 / scaleFactorY;
    } else {
        lineMeanHeight = 17 / scaleFactorY;
    }
    if (doc != null && createTitle) {
        addTitlePage(doc);
        // logger.debug("page number " + getPageNumber());
        if (getPageNumber() % 1 != 0) {
            logger.debug("odd page number -> add one new page");
            document.newPage();
            // necessary that an empty page can be created
            writer.setPageEmpty(false);
        }
    }
    document.newPage();
    addTextAndImage(pc, cutoffLeft, cutoffTop, img, imageOnly, cache);
    if (addAdditionalPlainTextPage) {
        if (nrOfTextRegions > 0) {
            logger.debug("add uniform text");
            document.newPage();
            addUniformText(pc, cutoffLeft, cutoffTop, cache);
        }
    }
}
Also used : Rectangle(java.awt.Rectangle) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) UnknownRegionType(eu.transkribus.core.model.beans.pagecontent.UnknownRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) Rectangle(java.awt.Rectangle) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BufferedImage(java.awt.image.BufferedImage) Image(com.itextpdf.text.Image) BufferedImage(java.awt.image.BufferedImage) URL(java.net.URL) Point(java.awt.Point) ITrpShapeType(eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType) Graphics2D(java.awt.Graphics2D) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) UnknownRegionType(eu.transkribus.core.model.beans.pagecontent.UnknownRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)

Example 8 with ITrpShapeType

use of eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType in project TranskribusCore by Transkribus.

the class TrpTxtBuilder method writeTxtForSinglePage.

private static void writeTxtForSinglePage(File file, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
    boolean rtl = false;
    // TrpTableRegionType is contained in the regions too
    List<TrpRegionType> regions = trpPage.getRegions();
    Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
    List<String> content = new ArrayList<String>();
    for (int j = 0; j < regions.size(); ++j) {
        TrpRegionType r = regions.get(j);
        if (r instanceof TrpTableRegionType) {
            /*
				 * TODO: for simple txt export: how to handle tables
				 */
            continue;
        } else if (r instanceof TrpTextRegionType) {
            TrpTextRegionType tr = (TrpTextRegionType) r;
            List<TextLineType> lines = tr.getTextLine();
            for (int i = 0; i < lines.size(); ++i) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(i);
                String textOfCurrLine = trpL.getUnicodeText();
                if (wordBased && trpL.getWord().size() > 0) {
                    for (WordType word : trpL.getWord()) {
                        content.add(((ITrpShapeType) word).getUnicodeText());
                    }
                } else if (textOfCurrLine != "") {
                    content.add(textOfCurrLine);
                }
            // if(preserveLineBreaks){
            // content.add(System.lineSeparator());
            // }
            }
            if (lines.size() > 0) {
                content.add(System.lineSeparator());
            // try {
            // //Add line separator after each region
            // Files.write(Paths.get(file.getAbsolutePath()), new ArrayList<String>() {{ add(System.lineSeparator()); }}, utf8,
            // StandardOpenOption.CREATE, StandardOpenOption.APPEND);
            // } catch (IOException e) {
            // // TODO Auto-generated catch block
            // e.printStackTrace();
            // }
            }
        }
    }
    try {
        logger.debug("path " + Paths.get(file.getAbsolutePath()));
        Files.write(Paths.get(file.getAbsolutePath()), content, utf8, StandardOpenOption.CREATE, StandardOpenOption.APPEND);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ITrpShapeType(eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) ArrayList(java.util.ArrayList) List(java.util.List)

Example 9 with ITrpShapeType

use of eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType in project TranskribusCore by Transkribus.

the class CustomTagUtil method createReadingOrderOrderedGroup.

public static OrderedGroupType createReadingOrderOrderedGroup(List<? extends ITrpShapeType> shapes, String caption) {
    OrderedGroupType group = new OrderedGroupType();
    if (caption != null)
        group.setCaption(caption);
    group.setId("ro_" + CoreUtils.uniqueCurrentTimeMS());
    for (ITrpShapeType s : shapes) {
        if (s.getReadingOrder() != null) {
            RegionRefIndexedType rr = new RegionRefIndexedType();
            rr.setRegionRef(s);
            rr.setIndex(s.getReadingOrder());
            group.getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed().add(rr);
        }
    }
    return group;
}
Also used : RegionRefIndexedType(eu.transkribus.core.model.beans.pagecontent.RegionRefIndexedType) OrderedGroupType(eu.transkribus.core.model.beans.pagecontent.OrderedGroupType) ITrpShapeType(eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType)

Aggregations

ITrpShapeType (eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType)9 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)3 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)2 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)2 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)2 Image (com.itextpdf.text.Image)1 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)1 OrderedGroupType (eu.transkribus.core.model.beans.pagecontent.OrderedGroupType)1 RegionRefIndexedType (eu.transkribus.core.model.beans.pagecontent.RegionRefIndexedType)1 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)1 TextTypeSimpleType (eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType)1 UnknownRegionType (eu.transkribus.core.model.beans.pagecontent.UnknownRegionType)1 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)1 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)1 TrpStructureChangedEvent (eu.transkribus.core.model.beans.pagecontent_trp.observable.TrpObserveEvent.TrpStructureChangedEvent)1 TrpTextStyleChangedEvent (eu.transkribus.core.model.beans.pagecontent_trp.observable.TrpObserveEvent.TrpTextStyleChangedEvent)1 Graphics2D (java.awt.Graphics2D)1 Point (java.awt.Point)1 Rectangle (java.awt.Rectangle)1 BufferedImage (java.awt.image.BufferedImage)1