Search in sources :

Example 21 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class ScalePageCoordinatesToImageDimension method heldenbuch300to600dpi.

private static void heldenbuch300to600dpi() throws JAXBException, IOException {
    File input = new File("/tmp/Ambraser_Heldenbuch/Ambraser_Heldenbuch/page");
    File output = new File("/tmp/Ambraser_Heldenbuch/page_edited");
    if (!output.isDirectory()) {
        output.mkdirs();
    }
    File[] files = input.listFiles(new ExtensionFileFilter("xml", true, false));
    for (File f : files) {
        System.out.println("Processing file: " + f.getName());
        PcGtsType pc = PageXmlUtils.unmarshal(f);
        TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, 2, 2, 0);
        final String filename;
        if (f.getName().contains("_")) {
            filename = f.getName().split("_")[1];
        } else {
            filename = f.getName();
        }
        System.out.println("Writing file: " + filename);
        PageXmlUtils.marshalToFile(pc, new File(output.getAbsolutePath() + File.separator + filename));
    }
}
Also used : File(java.io.File) ExtensionFileFilter(eu.transkribus.core.io.util.ExtensionFileFilter) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 22 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class ScalePageCoordinatesToImageDimension method heldenbuch600to300dpi.

private static void heldenbuch600to300dpi() throws JAXBException, IOException {
    File input = new File("/tmp/Heldenbuch_600dpi/Heldenbuch_600dpi/page");
    File output = new File("/tmp/Heldenbuch_600dpi/page_edited");
    if (!output.isDirectory()) {
        output.mkdirs();
    }
    File[] files = input.listFiles(new ExtensionFileFilter("xml", true, false));
    for (File f : files) {
        System.out.println("Processing file: " + f.getName());
        PcGtsType pc = PageXmlUtils.unmarshal(f);
        TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, 0.5, 0.5, 0);
        final String filename;
        if (f.getName().contains("_")) {
            filename = f.getName().split("_")[1];
        } else {
            filename = f.getName();
        }
        System.out.println("Writing file: " + filename);
        PageXmlUtils.marshalToFile(pc, new File(output.getAbsolutePath() + File.separator + filename));
    }
}
Also used : File(java.io.File) ExtensionFileFilter(eu.transkribus.core.io.util.ExtensionFileFilter) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 23 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class PageXmlDaoTest method main.

public static void main(String[] args) {
    TrpDoc doc = FakeDocProvider.create(false);
    TrpTranscriptMetadata md = doc.getPages().get(0).getTranscripts().get(0);
    try {
        JAXBPageTranscript transcript = new JAXBPageTranscript(md);
        transcript.build();
        // JAXBPageTranscript transcript = TrpPageTranscriptBuilder.build(md);
        // get Source Document as String
        // DOMSource domSource = new DOMSource(transcript.getSourceDoc());
        // StringWriter writer = new StringWriter();
        // StreamResult result = new StreamResult(writer);
        // TransformerFactory tf = TransformerFactory.newInstance();
        // Transformer transformer = tf.newTransformer();
        // transformer.transform(domSource, result);
        // System.out.println("XML IN String format is: \n" + writer.toString());
        // check JaxB Element
        PcGtsType page = transcript.getPageData();
        if (page == null) {
            System.out.println("page XML is null");
            System.exit(0);
        }
        PageType pageType = page.getPage();
        System.out.println(page.getMetadata());
        if (pageType == null) {
            System.out.println("pagetype element is null");
            System.exit(0);
        }
        if (pageType.getTextRegionOrImageRegionOrLineDrawingRegion() == null) {
            System.out.println("Region list is null");
            System.exit(0);
        }
        int i = 0;
        for (TextRegionType tr : transcript.getPage().getTextRegions(true)) {
            tr.setId("" + i++);
        }
        List<TrpRegionType> regions = pageType.getTextRegionOrImageRegionOrLineDrawingRegion();
        for (RegionType r : regions) {
            if (r instanceof TextRegionType) {
                TextRegionType t = (TextRegionType) r;
                System.out.println(t.getId());
            }
        }
    } catch (IllegalArgumentException | IOException e) {
        e.printStackTrace();
    }
// try {
// PrimaPageTranscript ppt = PageXmlDao.getPrimaPageTranscript(md);
// Page page = ppt.getPageData();
// 
// System.out.println(page.getImageFilename());
// Region r = page.getLayout().getRegion("tempReg357564684568544579089");
// System.out.println(r.getType().getName());
// //			System.out.println(page.getLayout().getParentChildRelation(r.getType(), r.getId().toString()).getRelationType());
// 
// IdRegister idr = r.getIdRegister();
// //			idr.
// System.out.println(idr);
// } catch (IllegalArgumentException e) {
// 
// e.printStackTrace();
// } catch (MalformedURLException e) {
// 
// e.printStackTrace();
// } catch (UnsupportedFormatVersionException e) {
// 
// e.printStackTrace();
// }
}
Also used : TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) PageType(eu.transkribus.core.model.beans.pagecontent.PageType)

Example 24 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class ImgUtilsTest method testBorderRemoval.

private static void testBorderRemoval() throws IOException, JAXBException {
    File testImg = new File("/mnt/dea_scratch/TRP/test/I._ZvS_1902_4.Q/ZS-I-1902-198 (1).jpg");
    File testXml = new File("/mnt/dea_scratch/TRP/test/I._ZvS_1902_4.Q/page/ZS-I-1902-198 (1).xml");
    // Open the image.
    // BufferedImage baseImage = ImageIO.read(testImg);
    PcGtsType pc = PageXmlUtils.unmarshal(testXml);
    final CoordsType coords = pc.getPage().getPrintSpace().getCoords();
    // build printspace polygon
    Polygon p = PageXmlUtils.buildPolygon(coords);
    String outPng = "/tmp/output.png";
    File out = ImgUtils.killBorder(testImg, p, outPng);
// File bin = NcsrTools.binarize(out, new File("/tmp/bin.tiff"));
// 
// File reg = NcsrTools.segmentRegions(out, bin, new File("/tmp/reg.xml"));
// File lines = NcsrTools.segmentLines(bin, reg, new File("/tmp/output.xml"));
}
Also used : CoordsType(eu.transkribus.core.model.beans.pagecontent.CoordsType) Polygon(java.awt.Polygon) File(java.io.File) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 25 with PcGtsType

use of eu.transkribus.core.model.beans.pagecontent.PcGtsType in project TranskribusCore by Transkribus.

the class FEPLocalDocReader method loadFEPDoc.

public static TrpDoc loadFEPDoc(final String path, boolean validateMets, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars, IProgressMonitor monitor) throws Exception {
    final File inputDir = new File(path);
    logger.info("importing FEP document from path: " + path);
    ProgressUtils.beginTask(monitor, "Importing a FEP document", -1);
    ProgressUtils.subTask(monitor, "Parsing mets");
    // find mets file:
    File metsFile = findMetsFile(inputDir);
    // unmarshall mets:
    Mets mets = unmarshalMets(metsFile, validateMets);
    // create trp-document and set metadata:
    TrpDoc trpDoc = new TrpDoc();
    setTitle(trpDoc, mets);
    trpDoc.getMd().setDesc("Imported from FEP export");
    trpDoc.getMd().setLocalFolder(inputDir);
    File pageDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.PAGE_FILE_SUB_FOLDER);
    File thumbDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.THUMBS_FILE_SUB_FOLDER);
    // parse physical structure:
    List<HashMap<String, File>> physStruct = parsePhysicalStructure(inputDir, mets);
    final int nPages = physStruct.size();
    ProgressUtils.beginTask(monitor, "Importing a FEP document", nPages);
    // create PAGEs:
    List<TrpPage> pages = new ArrayList<TrpPage>(nPages);
    int pageNr = 0;
    for (HashMap<String, File> files : physStruct) {
        ProgressUtils.subTask(monitor, "Importing page " + pageNr);
        ++pageNr;
        logger.debug("page: " + pageNr + ", nr of files: " + files.size());
        // first, check if image file is there and set some variables:
        if (!files.containsKey(IMG_GRP))
            throw new IOException("Image file for page " + pageNr + " could not be found!");
        File imgFile = files.get(IMG_GRP);
        ;
        String imgFileBn = FilenameUtils.getBaseName(imgFile.getName());
        File thumbFile = LocalDocReader.getThumbFile(thumbDir, imgFileBn);
        File pageOutFile = new File(pageDir.getAbsolutePath() + "/" + imgFileBn + ".xml");
        FileUtils.forceMkdir(pageOutFile.getParentFile());
        if (files.containsKey(ALTO_GRP)) {
            File altoFile = files.get(ALTO_GRP);
            PcGtsType pc = LocalDocReader.createPageFromAlto2(imgFile.getName(), altoFile, preserveOcrTxtStyles, preserveOcrFontFamily, replaceBadChars);
            pageOutFile = JaxbUtils.marshalToFile(pc, pageOutFile);
        } else {
            throw new IOException("ALTO file for image " + pageNr + " could not be found!");
        // TODO: create empty page file -> NO!
        }
        // TODO is is assumed that the image is not corrupt here! Try to read dimension to be sure
        TrpPage page = LocalDocReader.buildPage(inputDir, pageNr, imgFile, pageOutFile, thumbFile, null, null);
        // exract logical structs for this page from mets and apply them to the page:
        applyLogicalStructFromMetsToPageFile(mets, pageNr, pageOutFile);
        pages.add(page);
        ProgressUtils.worked(monitor, pageNr);
    }
    trpDoc.setPages(pages);
    return trpDoc;
}
Also used : HashMap(java.util.HashMap) TrpPage(eu.transkribus.core.model.beans.TrpPage) ArrayList(java.util.ArrayList) IOException(java.io.IOException) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) Mets(eu.transkribus.core.model.beans.mets.Mets) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) File(java.io.File)

Aggregations

PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)36 File (java.io.File)16 IOException (java.io.IOException)16 JAXBException (javax.xml.bind.JAXBException)11 TrpPage (eu.transkribus.core.model.beans.TrpPage)8 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)6 URL (java.net.URL)6 JAXBElement (javax.xml.bind.JAXBElement)6 Unmarshaller (javax.xml.bind.Unmarshaller)6 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)5 FileNotFoundException (java.io.FileNotFoundException)5 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)5 TransformerException (javax.xml.transform.TransformerException)5 SAXException (org.xml.sax.SAXException)5 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)4 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)4 Dimension (java.awt.Dimension)4 FimgStoreImgMd (org.dea.fimgstoreclient.beans.FimgStoreImgMd)4 XmlFormat (eu.transkribus.core.io.formats.XmlFormat)3