Search in sources :

Example 31 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class PageXmlUtils method unmarshal.

public static PcGtsType unmarshal(InputStream is) throws JAXBException {
    Unmarshaller u = createUnmarshaller();
    @SuppressWarnings("unchecked") PcGtsType pageData = ((JAXBElement<PcGtsType>) u.unmarshal(is)).getValue();
    onPostConstruct(pageData);
    return pageData;
}
Also used : JAXBElement(javax.xml.bind.JAXBElement) Unmarshaller(javax.xml.bind.Unmarshaller) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 32 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class PageXmlUtils method createPcGtsTypeFromAbbyy.

public static PcGtsType createPcGtsTypeFromAbbyy(File abbyyXml, final String imgFileName, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars) throws TransformerException, SAXException, IOException, ParserConfigurationException, JAXBException {
    // simple transform to file. Does not set imageFileName!!
    // pageXml = XslTransformer.transform(abbyyXml, ABBY_TO_PAGE_XSLT, pageOutFile);
    Map<String, Object> params = null;
    // set parameter for textStyle preservation
    params = new HashMap<>();
    params.put(TEXT_STYLE_XSL_PARAM_NAME, new Boolean(preserveOcrTxtStyles));
    params.put(FONT_FAM_XSL_PARAM_NAME, new Boolean(preserveOcrFontFamily));
    // transform into Object and set imgFileName as it is not avail in abbyy XML
    PcGtsType pc = JaxbUtils.transformToObject(abbyyXml, ABBY_TO_PAGE_XSLT, params, PcGtsType.class);
    pc.getPage().setImageFilename(imgFileName);
    if (replaceBadChars) {
        pc = FinereaderUtils.replaceBadChars(pc);
    }
    return pc;
}
Also used : PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 33 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class PageXmlUtils method createPcGtsTypeFromAlto.

public static PcGtsType createPcGtsTypeFromAlto(File altoXml, String imgFileName, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars) throws TransformerException, SAXException, IOException, ParserConfigurationException, JAXBException {
    // simple transform to file. Does not set imageFileName!!
    // pageXml = XslTransformer.transform(abbyyXml, ABBY_TO_PAGE_XSLT, pageOutFile);
    Map<String, Object> params = null;
    // set parameter for textStyle preservation
    params = new HashMap<>();
    params.put(TEXT_STYLE_XSL_PARAM_NAME, new Boolean(preserveOcrTxtStyles));
    params.put(FONT_FAM_XSL_PARAM_NAME, new Boolean(preserveOcrFontFamily));
    // transform into Object and set imgFileName as it is not avail in abbyy XML
    PcGtsType pc = JaxbUtils.transformToObject(altoXml, ALTO_TO_PAGE_XSLT, params, PcGtsType.class);
    pc.getPage().setImageFilename(imgFileName);
    if (replaceBadChars) {
        pc = FinereaderUtils.replaceBadChars(pc);
    }
    return pc;
}
Also used : PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 34 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class ScalePageCoordinatesToImageDimension method fixAltoMmToPx.

private static void fixAltoMmToPx(final TrpDoc doc) throws IOException, JAXBException {
    for (TrpPage p : doc.getPages()) {
        final double imgWidth = p.getWidth();
        final double imgHeight = p.getHeight();
        File f = FileUtils.toFile(p.getCurrentTranscript().getUrl());
        PcGtsType pc = PageXmlUtils.unmarshal(f);
        final double altoWidth = pc.getPage().getImageWidth();
        final double altoHeight = pc.getPage().getImageHeight();
        logger.info("Img: " + imgWidth + "x" + imgHeight + " | ALTO: " + altoWidth + "x" + altoHeight);
        double scaleX = (imgWidth / (altoWidth / 100f)) / 100f;
        double scaleY = (imgHeight / (altoHeight / 100f)) / 100f;
        logger.info("Scale factor X: " + scaleX);
        logger.info("Scale factor Y: " + scaleY);
        TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, scaleX, scaleY, 0);
        PageXmlUtils.marshalToFile(pc, f);
    }
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) File(java.io.File) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 35 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class DocStatisticsBuilder method compute.

public TrpTranscriptStatistics compute(TrpDoc doc) throws JAXBException {
    if (doc == null) {
        throw new IllegalArgumentException("TrpDoc is null!");
    }
    TrpTranscriptStatistics stats = new TrpTranscriptStatistics();
    List<TrpPage> pages = doc.getPages();
    for (TrpPage p : pages) {
        final String msg = "Computing stats: page " + p.getPageNr() + "/" + pages.size();
        logger.debug(msg);
        notifyObservers(msg);
        setChanged();
        URL xmlUrl = p.getCurrentTranscript().getUrl();
        PcGtsType pc = PageXmlUtils.unmarshal(xmlUrl);
        TrpTranscriptStatistics pageStats = PageXmlUtils.extractStats(pc);
        stats.add(pageStats);
    }
    return stats;
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptStatistics(eu.transkribus.core.model.beans.TrpTranscriptStatistics) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) URL(java.net.URL)

Aggregations

PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)36 File (java.io.File)16 IOException (java.io.IOException)16 JAXBException (javax.xml.bind.JAXBException)11 TrpPage (eu.transkribus.core.model.beans.TrpPage)8 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)6 URL (java.net.URL)6 JAXBElement (javax.xml.bind.JAXBElement)6 Unmarshaller (javax.xml.bind.Unmarshaller)6 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)5 FileNotFoundException (java.io.FileNotFoundException)5 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)5 TransformerException (javax.xml.transform.TransformerException)5 SAXException (org.xml.sax.SAXException)5 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)4 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)4 Dimension (java.awt.Dimension)4 FimgStoreImgMd (org.dea.fimgstoreclient.beans.FimgStoreImgMd)4 XmlFormat (eu.transkribus.core.io.formats.XmlFormat)3