use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class PageXmlUtils method unmarshal.
public static PcGtsType unmarshal(InputStream is) throws JAXBException {
Unmarshaller u = createUnmarshaller();
@SuppressWarnings("unchecked") PcGtsType pageData = ((JAXBElement<PcGtsType>) u.unmarshal(is)).getValue();
onPostConstruct(pageData);
return pageData;
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class PageXmlUtils method createPcGtsTypeFromAbbyy.
public static PcGtsType createPcGtsTypeFromAbbyy(File abbyyXml, final String imgFileName, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars) throws TransformerException, SAXException, IOException, ParserConfigurationException, JAXBException {
// simple transform to file. Does not set imageFileName!!
// pageXml = XslTransformer.transform(abbyyXml, ABBY_TO_PAGE_XSLT, pageOutFile);
Map<String, Object> params = null;
// set parameter for textStyle preservation
params = new HashMap<>();
params.put(TEXT_STYLE_XSL_PARAM_NAME, new Boolean(preserveOcrTxtStyles));
params.put(FONT_FAM_XSL_PARAM_NAME, new Boolean(preserveOcrFontFamily));
// transform into Object and set imgFileName as it is not avail in abbyy XML
PcGtsType pc = JaxbUtils.transformToObject(abbyyXml, ABBY_TO_PAGE_XSLT, params, PcGtsType.class);
pc.getPage().setImageFilename(imgFileName);
if (replaceBadChars) {
pc = FinereaderUtils.replaceBadChars(pc);
}
return pc;
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class PageXmlUtils method createPcGtsTypeFromAlto.
public static PcGtsType createPcGtsTypeFromAlto(File altoXml, String imgFileName, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars) throws TransformerException, SAXException, IOException, ParserConfigurationException, JAXBException {
// simple transform to file. Does not set imageFileName!!
// pageXml = XslTransformer.transform(abbyyXml, ABBY_TO_PAGE_XSLT, pageOutFile);
Map<String, Object> params = null;
// set parameter for textStyle preservation
params = new HashMap<>();
params.put(TEXT_STYLE_XSL_PARAM_NAME, new Boolean(preserveOcrTxtStyles));
params.put(FONT_FAM_XSL_PARAM_NAME, new Boolean(preserveOcrFontFamily));
// transform into Object and set imgFileName as it is not avail in abbyy XML
PcGtsType pc = JaxbUtils.transformToObject(altoXml, ALTO_TO_PAGE_XSLT, params, PcGtsType.class);
pc.getPage().setImageFilename(imgFileName);
if (replaceBadChars) {
pc = FinereaderUtils.replaceBadChars(pc);
}
return pc;
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class ScalePageCoordinatesToImageDimension method fixAltoMmToPx.
private static void fixAltoMmToPx(final TrpDoc doc) throws IOException, JAXBException {
for (TrpPage p : doc.getPages()) {
final double imgWidth = p.getWidth();
final double imgHeight = p.getHeight();
File f = FileUtils.toFile(p.getCurrentTranscript().getUrl());
PcGtsType pc = PageXmlUtils.unmarshal(f);
final double altoWidth = pc.getPage().getImageWidth();
final double altoHeight = pc.getPage().getImageHeight();
logger.info("Img: " + imgWidth + "x" + imgHeight + " | ALTO: " + altoWidth + "x" + altoHeight);
double scaleX = (imgWidth / (altoWidth / 100f)) / 100f;
double scaleY = (imgHeight / (altoHeight / 100f)) / 100f;
logger.info("Scale factor X: " + scaleX);
logger.info("Scale factor Y: " + scaleY);
TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, scaleX, scaleY, 0);
PageXmlUtils.marshalToFile(pc, f);
}
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class DocStatisticsBuilder method compute.
public TrpTranscriptStatistics compute(TrpDoc doc) throws JAXBException {
if (doc == null) {
throw new IllegalArgumentException("TrpDoc is null!");
}
TrpTranscriptStatistics stats = new TrpTranscriptStatistics();
List<TrpPage> pages = doc.getPages();
for (TrpPage p : pages) {
final String msg = "Computing stats: page " + p.getPageNr() + "/" + pages.size();
logger.debug(msg);
notifyObservers(msg);
setChanged();
URL xmlUrl = p.getCurrentTranscript().getUrl();
PcGtsType pc = PageXmlUtils.unmarshal(xmlUrl);
TrpTranscriptStatistics pageStats = PageXmlUtils.extractStats(pc);
stats.add(pageStats);
}
return stats;
}
Aggregations