use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class ScalePageCoordinatesToImageDimension method heldenbuch300to600dpi.
private static void heldenbuch300to600dpi() throws JAXBException, IOException {
File input = new File("/tmp/Ambraser_Heldenbuch/Ambraser_Heldenbuch/page");
File output = new File("/tmp/Ambraser_Heldenbuch/page_edited");
if (!output.isDirectory()) {
output.mkdirs();
}
File[] files = input.listFiles(new ExtensionFileFilter("xml", true, false));
for (File f : files) {
System.out.println("Processing file: " + f.getName());
PcGtsType pc = PageXmlUtils.unmarshal(f);
TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, 2, 2, 0);
final String filename;
if (f.getName().contains("_")) {
filename = f.getName().split("_")[1];
} else {
filename = f.getName();
}
System.out.println("Writing file: " + filename);
PageXmlUtils.marshalToFile(pc, new File(output.getAbsolutePath() + File.separator + filename));
}
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class ScalePageCoordinatesToImageDimension method heldenbuch600to300dpi.
private static void heldenbuch600to300dpi() throws JAXBException, IOException {
File input = new File("/tmp/Heldenbuch_600dpi/Heldenbuch_600dpi/page");
File output = new File("/tmp/Heldenbuch_600dpi/page_edited");
if (!output.isDirectory()) {
output.mkdirs();
}
File[] files = input.listFiles(new ExtensionFileFilter("xml", true, false));
for (File f : files) {
System.out.println("Processing file: " + f.getName());
PcGtsType pc = PageXmlUtils.unmarshal(f);
TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, 0.5, 0.5, 0);
final String filename;
if (f.getName().contains("_")) {
filename = f.getName().split("_")[1];
} else {
filename = f.getName();
}
System.out.println("Writing file: " + filename);
PageXmlUtils.marshalToFile(pc, new File(output.getAbsolutePath() + File.separator + filename));
}
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class PageXmlDaoTest method main.
public static void main(String[] args) {
TrpDoc doc = FakeDocProvider.create(false);
TrpTranscriptMetadata md = doc.getPages().get(0).getTranscripts().get(0);
try {
JAXBPageTranscript transcript = new JAXBPageTranscript(md);
transcript.build();
// JAXBPageTranscript transcript = TrpPageTranscriptBuilder.build(md);
// get Source Document as String
// DOMSource domSource = new DOMSource(transcript.getSourceDoc());
// StringWriter writer = new StringWriter();
// StreamResult result = new StreamResult(writer);
// TransformerFactory tf = TransformerFactory.newInstance();
// Transformer transformer = tf.newTransformer();
// transformer.transform(domSource, result);
// System.out.println("XML IN String format is: \n" + writer.toString());
// check JaxB Element
PcGtsType page = transcript.getPageData();
if (page == null) {
System.out.println("page XML is null");
System.exit(0);
}
PageType pageType = page.getPage();
System.out.println(page.getMetadata());
if (pageType == null) {
System.out.println("pagetype element is null");
System.exit(0);
}
if (pageType.getTextRegionOrImageRegionOrLineDrawingRegion() == null) {
System.out.println("Region list is null");
System.exit(0);
}
int i = 0;
for (TextRegionType tr : transcript.getPage().getTextRegions(true)) {
tr.setId("" + i++);
}
List<TrpRegionType> regions = pageType.getTextRegionOrImageRegionOrLineDrawingRegion();
for (RegionType r : regions) {
if (r instanceof TextRegionType) {
TextRegionType t = (TextRegionType) r;
System.out.println(t.getId());
}
}
} catch (IllegalArgumentException | IOException e) {
e.printStackTrace();
}
// try {
// PrimaPageTranscript ppt = PageXmlDao.getPrimaPageTranscript(md);
// Page page = ppt.getPageData();
//
// System.out.println(page.getImageFilename());
// Region r = page.getLayout().getRegion("tempReg357564684568544579089");
// System.out.println(r.getType().getName());
// // System.out.println(page.getLayout().getParentChildRelation(r.getType(), r.getId().toString()).getRelationType());
//
// IdRegister idr = r.getIdRegister();
// // idr.
// System.out.println(idr);
// } catch (IllegalArgumentException e) {
//
// e.printStackTrace();
// } catch (MalformedURLException e) {
//
// e.printStackTrace();
// } catch (UnsupportedFormatVersionException e) {
//
// e.printStackTrace();
// }
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class ImgUtilsTest method testBorderRemoval.
private static void testBorderRemoval() throws IOException, JAXBException {
File testImg = new File("/mnt/dea_scratch/TRP/test/I._ZvS_1902_4.Q/ZS-I-1902-198 (1).jpg");
File testXml = new File("/mnt/dea_scratch/TRP/test/I._ZvS_1902_4.Q/page/ZS-I-1902-198 (1).xml");
// Open the image.
// BufferedImage baseImage = ImageIO.read(testImg);
PcGtsType pc = PageXmlUtils.unmarshal(testXml);
final CoordsType coords = pc.getPage().getPrintSpace().getCoords();
// build printspace polygon
Polygon p = PageXmlUtils.buildPolygon(coords);
String outPng = "/tmp/output.png";
File out = ImgUtils.killBorder(testImg, p, outPng);
// File bin = NcsrTools.binarize(out, new File("/tmp/bin.tiff"));
//
// File reg = NcsrTools.segmentRegions(out, bin, new File("/tmp/reg.xml"));
// File lines = NcsrTools.segmentLines(bin, reg, new File("/tmp/output.xml"));
}
use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.
the class FEPLocalDocReader method loadFEPDoc.
public static TrpDoc loadFEPDoc(final String path, boolean validateMets, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars, IProgressMonitor monitor) throws Exception {
final File inputDir = new File(path);
logger.info("importing FEP document from path: " + path);
ProgressUtils.beginTask(monitor, "Importing a FEP document", -1);
ProgressUtils.subTask(monitor, "Parsing mets");
// find mets file:
File metsFile = findMetsFile(inputDir);
// unmarshall mets:
Mets mets = unmarshalMets(metsFile, validateMets);
// create trp-document and set metadata:
TrpDoc trpDoc = new TrpDoc();
setTitle(trpDoc, mets);
trpDoc.getMd().setDesc("Imported from FEP export");
trpDoc.getMd().setLocalFolder(inputDir);
File pageDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.PAGE_FILE_SUB_FOLDER);
File thumbDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.THUMBS_FILE_SUB_FOLDER);
// parse physical structure:
List<HashMap<String, File>> physStruct = parsePhysicalStructure(inputDir, mets);
final int nPages = physStruct.size();
ProgressUtils.beginTask(monitor, "Importing a FEP document", nPages);
// create PAGEs:
List<TrpPage> pages = new ArrayList<TrpPage>(nPages);
int pageNr = 0;
for (HashMap<String, File> files : physStruct) {
ProgressUtils.subTask(monitor, "Importing page " + pageNr);
++pageNr;
logger.debug("page: " + pageNr + ", nr of files: " + files.size());
// first, check if image file is there and set some variables:
if (!files.containsKey(IMG_GRP))
throw new IOException("Image file for page " + pageNr + " could not be found!");
File imgFile = files.get(IMG_GRP);
;
String imgFileBn = FilenameUtils.getBaseName(imgFile.getName());
File thumbFile = LocalDocReader.getThumbFile(thumbDir, imgFileBn);
File pageOutFile = new File(pageDir.getAbsolutePath() + "/" + imgFileBn + ".xml");
FileUtils.forceMkdir(pageOutFile.getParentFile());
if (files.containsKey(ALTO_GRP)) {
File altoFile = files.get(ALTO_GRP);
PcGtsType pc = LocalDocReader.createPageFromAlto2(imgFile.getName(), altoFile, preserveOcrTxtStyles, preserveOcrFontFamily, replaceBadChars);
pageOutFile = JaxbUtils.marshalToFile(pc, pageOutFile);
} else {
throw new IOException("ALTO file for image " + pageNr + " could not be found!");
// TODO: create empty page file -> NO!
}
// TODO is is assumed that the image is not corrupt here! Try to read dimension to be sure
TrpPage page = LocalDocReader.buildPage(inputDir, pageNr, imgFile, pageOutFile, thumbFile, null, null);
// exract logical structs for this page from mets and apply them to the page:
applyLogicalStructFromMetsToPageFile(mets, pageNr, pageOutFile);
pages.add(page);
ProgressUtils.worked(monitor, pageNr);
}
trpDoc.setPages(pages);
return trpDoc;
}
Aggregations