Search in sources :

Example 16 with TrpDoc

use of eu.transkribus.core.model.beans.TrpDoc in project TranskribusCore by Transkribus.

the class TrpMetsBuilderTest method createMets.

public static void createMets(File folder, boolean printResultOnSysOut) throws UnsupportedFormatException, IOException, JAXBException {
    if (folder == null || !folder.isDirectory())
        throw new IOException("Folder null or no directory!");
    TrpDoc doc = LocalDocReader.load(folder.getAbsolutePath());
    // System.out.println(doc.toString());
    // 2nd arg: export page files (add to mets filesec), 3rd arg: export alto files, 4th arg: export images
    Mets mets = TrpMetsBuilder.buildMets(doc, true, false, true, null);
    String outFile = folder.getAbsolutePath() + "/mets.xml";
    JaxbUtils.marshalToFile(mets, new File(outFile), TrpDocMetadata.class);
    if (printResultOnSysOut)
        JaxbUtils.marshalToSysOut(mets, TrpDocMetadata.class);
}
Also used : Mets(eu.transkribus.core.model.beans.mets.Mets) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) IOException(java.io.IOException) File(java.io.File)

Example 17 with TrpDoc

use of eu.transkribus.core.model.beans.TrpDoc in project TranskribusCore by Transkribus.

the class FEPLocalDocReader method loadFEPDoc.

public static TrpDoc loadFEPDoc(final String path, boolean validateMets, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars, IProgressMonitor monitor) throws Exception {
    final File inputDir = new File(path);
    logger.info("importing FEP document from path: " + path);
    ProgressUtils.beginTask(monitor, "Importing a FEP document", -1);
    ProgressUtils.subTask(monitor, "Parsing mets");
    // find mets file:
    File metsFile = findMetsFile(inputDir);
    // unmarshall mets:
    Mets mets = unmarshalMets(metsFile, validateMets);
    // create trp-document and set metadata:
    TrpDoc trpDoc = new TrpDoc();
    setTitle(trpDoc, mets);
    trpDoc.getMd().setDesc("Imported from FEP export");
    trpDoc.getMd().setLocalFolder(inputDir);
    File pageDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.PAGE_FILE_SUB_FOLDER);
    File thumbDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.THUMBS_FILE_SUB_FOLDER);
    // parse physical structure:
    List<HashMap<String, File>> physStruct = parsePhysicalStructure(inputDir, mets);
    final int nPages = physStruct.size();
    ProgressUtils.beginTask(monitor, "Importing a FEP document", nPages);
    // create PAGEs:
    List<TrpPage> pages = new ArrayList<TrpPage>(nPages);
    int pageNr = 0;
    for (HashMap<String, File> files : physStruct) {
        ProgressUtils.subTask(monitor, "Importing page " + pageNr);
        ++pageNr;
        logger.debug("page: " + pageNr + ", nr of files: " + files.size());
        // first, check if image file is there and set some variables:
        if (!files.containsKey(IMG_GRP))
            throw new IOException("Image file for page " + pageNr + " could not be found!");
        File imgFile = files.get(IMG_GRP);
        ;
        String imgFileBn = FilenameUtils.getBaseName(imgFile.getName());
        File thumbFile = LocalDocReader.getThumbFile(thumbDir, imgFileBn);
        File pageOutFile = new File(pageDir.getAbsolutePath() + "/" + imgFileBn + ".xml");
        FileUtils.forceMkdir(pageOutFile.getParentFile());
        if (files.containsKey(ALTO_GRP)) {
            File altoFile = files.get(ALTO_GRP);
            PcGtsType pc = LocalDocReader.createPageFromAlto2(imgFile.getName(), altoFile, preserveOcrTxtStyles, preserveOcrFontFamily, replaceBadChars);
            pageOutFile = JaxbUtils.marshalToFile(pc, pageOutFile);
        } else {
            throw new IOException("ALTO file for image " + pageNr + " could not be found!");
        // TODO: create empty page file -> NO!
        }
        // TODO is is assumed that the image is not corrupt here! Try to read dimension to be sure
        TrpPage page = LocalDocReader.buildPage(inputDir, pageNr, imgFile, pageOutFile, thumbFile, null, null);
        // exract logical structs for this page from mets and apply them to the page:
        applyLogicalStructFromMetsToPageFile(mets, pageNr, pageOutFile);
        pages.add(page);
        ProgressUtils.worked(monitor, pageNr);
    }
    trpDoc.setPages(pages);
    return trpDoc;
}
Also used : HashMap(java.util.HashMap) TrpPage(eu.transkribus.core.model.beans.TrpPage) ArrayList(java.util.ArrayList) IOException(java.io.IOException) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) Mets(eu.transkribus.core.model.beans.mets.Mets) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) File(java.io.File)

Example 18 with TrpDoc

use of eu.transkribus.core.model.beans.TrpDoc in project TranskribusCore by Transkribus.

the class TrpDocPacker method unpackDoc.

public TrpDoc unpackDoc(File zipFile, String path) throws IOException {
    if (path == null || path.isEmpty()) {
        path = TEMP_DIR + File.separator + FilenameUtils.getBaseName(zipFile.getName());
    }
    File dir = ZipUtils.unzip(zipFile, path);
    // assume mets path and open file
    final String metsPath = path + File.separator + TrpMetsBuilder.METS_FILE_NAME;
    File metsFile = new File(metsPath);
    if (!metsFile.exists()) {
        throw new IOException("No METS file included in zip!");
    }
    // final File parentDir = new File(metsFile.getParent());
    Mets mets;
    try {
        mets = JaxbUtils.unmarshal(metsFile, Mets.class, TrpDocMetadata.class);
    } catch (JAXBException e) {
        throw new IOException("Could not unmarshal METS file!", e);
    }
    TrpDoc doc = LocalDocReader.load(mets, dir);
    return doc;
}
Also used : Mets(eu.transkribus.core.model.beans.mets.Mets) JAXBException(javax.xml.bind.JAXBException) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) IOException(java.io.IOException) File(java.io.File)

Example 19 with TrpDoc

use of eu.transkribus.core.model.beans.TrpDoc in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method main.

public static void main(String[] args) throws IOException, JAXBException {
    TrpDoc doc = LocalDocReader.load("/media/dea_scratch/TRP/Schauplatz_small/");
    // TrpPage page = doc.getPages().get(0);
    // TrpTranscriptMetadata md = page.getCurrentTranscript();
    writeRtfForDoc(doc, false, new File("test_rtf.rtf"), null, null);
    // Rtf.rtf().p("helo");
    // italic( underline( "italic"+bold( "with something in bold" )+" underline" ) );
    // String str = "abcd";
    // for (int i=0; i<str.length(); ++i) {
    // System.out.println(str.substring(i, i+1));
    // }
    // rtf().section(
    // p( "first paragraph" ),
    // p("whatever"),
    // p( tab(),
    // " second par ",
    // italic( underline( "italic") ).bold( underline("with somethinnng in bold") ),
    // text( " and " ),
    // italic( underline( "italic underline" ) )
    // )
    // ).out( new FileWriter("testout.rtf") );
    System.out.println("finished");
}
Also used : TrpDoc(eu.transkribus.core.model.beans.TrpDoc) File(java.io.File)

Example 20 with TrpDoc

use of eu.transkribus.core.model.beans.TrpDoc in project TranskribusCore by Transkribus.

the class TrpTxtBuilder method main.

public static void main(String[] args) {
    // final String path = "/mnt/dea_scratch/TRP/Bentham_box_002_GT";
    final String path = "X:/TRP/Bentham_box_002_GT";
    try {
        TrpDoc doc = LocalDocReader.load(path);
        writeTxtForDoc(doc, true, false, true, new File("TxtExportTest.txt"), null, null, new ExportCache());
    } catch (JAXBException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Docx4JException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : ExportCache(eu.transkribus.core.model.builder.ExportCache) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) JAXBException(javax.xml.bind.JAXBException) IOException(java.io.IOException) File(java.io.File) Docx4JException(org.docx4j.openpackaging.exceptions.Docx4JException)

Aggregations

TrpDoc (eu.transkribus.core.model.beans.TrpDoc)21 File (java.io.File)12 IOException (java.io.IOException)11 TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)9 TrpPage (eu.transkribus.core.model.beans.TrpPage)6 JAXBException (javax.xml.bind.JAXBException)6 Mets (eu.transkribus.core.model.beans.mets.Mets)5 FileNotFoundException (java.io.FileNotFoundException)4 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)3 CorruptImageException (eu.transkribus.core.exceptions.CorruptImageException)2 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2 ExportCache (eu.transkribus.core.model.builder.ExportCache)2 Dimension (java.awt.Dimension)2 URL (java.net.URL)2 ArrayList (java.util.ArrayList)2 TrpDocPacker (eu.transkribus.core.io.TrpDocPacker)1 XmlFormat (eu.transkribus.core.io.formats.XmlFormat)1 Md5SumComputer (eu.transkribus.core.io.util.Md5SumComputer)1 PageUploadDescriptor (eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)1