Search in sources :

Example 1 with Mets

use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.

the class GoobiMetsImporter method unmarshalMets.

private Mets unmarshalMets(File metsFile, boolean validate) throws IOException, JAXBException, SAXException {
    Mets mets;
    // try {
    Unmarshaller u = JaxbUtils.createUnmarshaller(Mets.class, TrpDocMetadata.class);
    long t = System.currentTimeMillis();
    if (validate) {
        Schema schema = XmlFormat.METS.getOrCompileSchema();
        u.setSchema(schema);
    }
    Object o = u.unmarshal(metsFile);
    mets = (Mets) o;
    logger.debug("time for unmarshalling: " + (System.currentTimeMillis() - t) + ", validated: " + validate);
    // mets = JaxbUtils.unmarshal(metsFile, Mets.class, nestedClassed);
    // mets = JaxbUtils.unmarshal2(new FileInputStream(metsFile), Mets.class, true, false);
    // } catch (Exception e) {
    // throw new IOException("Could not unmarshal METS file!", e);
    // }
    logger.debug("unmarshalled mets file");
    return mets;
}
Also used : Mets(eu.transkribus.core.model.beans.mets.Mets) Schema(javax.xml.validation.Schema) Unmarshaller(javax.xml.bind.Unmarshaller)

Example 2 with Mets

use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.

the class GoobiMetsImporter method loadDocFromGoobiMets.

/**
 * Reads the Mets metadata and fetches all files with help of the contained URLs
 * into an temporarily folder and creates a TrpDoc out of it
 *
 * @param metsPath: path to the Goobi mets file
 * @return
 * @throws IOException
 * @throws SAXException
 * @throws JAXBException
 */
public TrpDoc loadDocFromGoobiMets(File metsFile, String localDirPath) throws IOException, JAXBException, SAXException {
    TrpDocMetadata md;
    Mets mets = JaxbUtils.unmarshal(metsFile, Mets.class, TrpDocMetadata.class);
    String metsPath = metsFile.getAbsolutePath();
    updateStatus("Reading metadata...");
    // unmarshal TrpDocMetadata
    md = readModsMetadata(XmlUtils.getDocumentFromFileWOE(metsPath));
    // String localDir = System.getProperty("user.home") + File.separator + "GoobiTest" + File.separator + md.getTitle() + File.separator;
    logger.debug("the local user home dir = " + localDirPath);
    // System.in.read();
    // collect files into "user.home" + "/GoobiTest/" + mods title
    // fetchFiles(localDirPath, mets);
    md.setLocalFolder(new File(localDirPath));
    /*
		 * next line can disorder the ORDER of the pages of the Mets when filename length is not equal and we store 
		 * the files temporary in a local folder instead of importing directly as we did now
		 */
    // final TrpDoc doc = LocalDocReader.load(localDirPath, true);
    // overwrite metadata with the metadata read from the MODS section in the METS file
    final TrpDoc doc = new TrpDoc();
    doc.setMd(md);
    doc.setPages(fetchFiles(localDirPath, mets));
    return doc;
}
Also used : Mets(eu.transkribus.core.model.beans.mets.Mets) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) File(java.io.File)

Example 3 with Mets

use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.

the class DocExporter method exportDoc.

/**
 * Export current document with the provided parameters.
 * @param doc current document
 * @param pars export settings
 * @return directory to which the export files were written
 * @throws IOException
 * @throws IllegalArgumentException
 * @throws URISyntaxException
 * @throws JAXBException
 * @throws TransformerException
 */
public File exportDoc(TrpDoc doc, CommonExportPars pars) throws IOException, IllegalArgumentException, URISyntaxException, JAXBException, TransformerException {
    FimgStoreGetClient getter = null;
    FimgStoreUriBuilder uriBuilder = null;
    ImgType imgType = pars.getRemoteImgQuality() == null ? ImgType.orig : pars.getRemoteImgQuality();
    if (doc.isRemoteDoc()) {
        // FIXME fimagestore path should be read from docMd!
        getter = new FimgStoreGetClient("dbis-thure.uibk.ac.at", "f");
        final String scheme = pars.isUseHttps() ? "https" : "http";
        final int port = pars.isUseHttps() ? 443 : 80;
        uriBuilder = new FimgStoreUriBuilder(scheme, getter.getHost(), port, getter.getServerContext());
    }
    // create copy of object, as we alter it here while exporting
    TrpDoc doc2;
    doc2 = new TrpDoc(doc);
    // check and create output directory
    File outputDir = new File(pars.getDir());
    if (!pars.isDoOverwrite() && outputDir.exists()) {
        throw new IOException("File path already exists.");
    }
    outputDir.mkdir();
    // decide where to put the images
    final File imgOutputDir;
    if (pars.isUseOcrMasterDir()) {
        imgOutputDir = new File(outputDir.getAbsolutePath() + File.separatorChar + LocalDocConst.OCR_MASTER_DIR);
        imgOutputDir.mkdir();
    } else {
        imgOutputDir = outputDir;
    }
    File pageOutputDir = null, altoOutputDir = null;
    // check PAGE export settings and create output directory
    String pageDirName = pars.getPageDirName();
    if (pars.isDoExportPageXml() && !StringUtils.isEmpty(pageDirName)) {
        pageOutputDir = new File(outputDir.getAbsolutePath() + File.separatorChar + pageDirName);
        if (pageOutputDir.mkdir()) {
            logger.debug("pageOutputDir created successfully ");
        } else {
            logger.debug("pageOutputDir could not be created!");
        }
    } else {
        // if pageDirName is not set, export the PAGE XMLs to imgOutputDir
        pageOutputDir = imgOutputDir;
    }
    // check Alto export settings and create output directory
    AltoExporter altoEx = new AltoExporter();
    if (pars.isDoExportAltoXml()) {
        altoOutputDir = altoEx.createAltoOuputDir(doc2, outputDir.getAbsolutePath());
    }
    // check and write metadata
    if (doc2.getMd() != null) {
        File fileOut = new File(outputDir.getAbsolutePath() + File.separatorChar + LocalDocConst.METADATA_FILENAME);
        try {
            JaxbUtils.marshalToFile(doc2.getMd(), fileOut);
        } catch (JAXBException e) {
            throw new IOException("Could not marshal metadata to file.", e);
        }
    }
    List<TrpPage> pages = doc2.getPages();
    Set<Integer> pageIndices = pars.getPageIndices(doc.getNPages());
    // do export for all defined pages
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i)) {
            continue;
        }
        TrpPage p = pages.get(i);
        File imgFile = null, xmlFile = null, altoFile = null;
        URL imgUrl = p.getUrl();
        final String baseFileName = ExportFilePatternUtils.buildBaseFileName(pars.getFileNamePattern(), p);
        final String imgExt = "." + FilenameUtils.getExtension(p.getImgFileName());
        final String xmlExt = ".xml";
        // gather remote files and export document
        if (doc2.isRemoteDoc()) {
            if (pars.isDoWriteImages()) {
                final String msg = "Downloading " + imgType.toString() + " image for page nr. " + p.getPageNr();
                logger.debug(msg);
                updateStatus(msg);
                final URI imgUri = uriBuilder.getImgUri(p.getKey(), imgType);
                imgFile = getter.saveFile(imgUri, imgOutputDir.getAbsolutePath(), baseFileName + imgExt);
                p.setUrl(imgFile.toURI().toURL());
                p.setKey(null);
            }
            if (pars.isDoExportPageXml()) {
                // old
                // TrpTranscriptMetadata t = p.getCurrentTranscript();
                /*
					 * new: to get the previously stored chosen version
					 */
                TrpTranscriptMetadata transcriptMd;
                JAXBPageTranscript transcript = cache.getPageTranscriptAtIndex(i);
                // set up transcript metadata
                if (transcript == null) {
                    transcriptMd = p.getCurrentTranscript();
                    logger.warn("Have to unmarshall transcript in DocExporter for transcript " + transcriptMd + " - should have been built before using ExportUtils::storePageTranscripts4Export!");
                    transcript = new JAXBPageTranscript(transcriptMd);
                    transcript.build();
                } else {
                    transcriptMd = transcript.getMd();
                }
                URL xmlUrl = transcriptMd.getUrl();
                if (pars.isExportTranscriptMetadata()) {
                    MetadataType md = transcript.getPage().getPcGtsType().getMetadata();
                    if (md == null) {
                        throw new JAXBException("Transcript does not contain a metadata element: " + transcriptMd);
                    }
                    String imgUrlStr = CoreUtils.urlToString(imgUrl);
                    String xmlUrlStr = CoreUtils.urlToString(xmlUrl);
                    String status = transcriptMd.getStatus() == null ? null : transcriptMd.getStatus().toString();
                    TranskribusMetadataType tmd = new TranskribusMetadataType();
                    tmd.setDocId(doc.getId());
                    tmd.setPageId(p.getPageId());
                    tmd.setPageNr(p.getPageNr());
                    tmd.setTsid(transcriptMd.getTsId());
                    tmd.setStatus(status);
                    tmd.setUserId(transcriptMd.getUserId());
                    tmd.setImgUrl(imgUrlStr);
                    tmd.setXmlUrl(xmlUrlStr);
                    tmd.setImageId(p.getImageId());
                    md.setTranskribusMetadata(tmd);
                }
                // write transcript to file
                xmlFile = new File(FilenameUtils.normalizeNoEndSeparator(pageOutputDir.getAbsolutePath()) + File.separator + baseFileName + xmlExt);
                logger.debug("PAGE XMl output file: " + xmlFile.getAbsolutePath());
                transcript.write(xmlFile);
                // old code: save file by just downloading to disk
                // xmlFile = getter.saveFile(transcriptMd.getUrl().toURI(), pageOutputDir.getAbsolutePath(), baseFileName + xmlExt);
                // make sure (for other exports) that the transcript that is exported is the only one set in the transcripts list of TrpPage
                p.getTranscripts().clear();
                TrpTranscriptMetadata tCopy = new TrpTranscriptMetadata(transcriptMd, p);
                tCopy.setUrl(xmlFile.toURI().toURL());
                p.getTranscripts().add(tCopy);
            }
        } else {
            updateStatus("Copying local files for page nr. " + p.getPageNr());
            // copy local files during export
            if (pars.isDoWriteImages()) {
                imgFile = LocalDocWriter.copyImgFile(p, p.getUrl(), imgOutputDir.getAbsolutePath(), baseFileName + imgExt);
            }
            if (pars.isDoExportPageXml()) {
                xmlFile = LocalDocWriter.copyTranscriptFile(p, pageOutputDir.getAbsolutePath(), baseFileName + xmlExt, cache);
            }
        }
        // export alto:
        if (pars.isDoExportAltoXml()) {
            altoFile = altoEx.exportAltoFile(p, baseFileName + xmlExt, altoOutputDir, pars.isSplitIntoWordsInAltoXml());
        }
        if (imgFile != null)
            logger.debug("Written image file " + imgFile.getAbsolutePath());
        if (xmlFile != null) {
            logger.debug("Written transcript xml file " + xmlFile.getAbsolutePath());
        } else {
            logger.warn("No transcript was exported for page ");
        }
        if (altoFile != null) {
            logger.debug("Written ALTO xml file " + altoFile.getAbsolutePath());
        } else {
            logger.warn("No alto was exported for page ");
        }
        setChanged();
        notifyObservers(Integer.valueOf(p.getPageNr()));
    }
    if (pars.isDoWriteMets()) {
        // load the exported doc from its new location
        // FIXME this does not work for export of PAGE XMLs only!
        // final TrpDoc localDoc = LocalDocReader.load(outputDir.getAbsolutePath(), false);
        // set local folder or else TrpMetsBuilder will treat this as remote doc!
        doc2.getMd().setLocalFolder(outputDir);
        // write mets with file pointers to local files
        Mets mets = TrpMetsBuilder.buildMets(doc2, pars.isDoExportPageXml(), pars.isDoExportAltoXml(), pars.isDoWriteImages(), pageIndices);
        File metsFile = new File(outputDir.getAbsolutePath() + File.separator + TrpMetsBuilder.METS_FILE_NAME);
        try {
            JaxbUtils.marshalToFile(mets, metsFile, TrpDocMetadata.class);
        } catch (JAXBException e) {
            throw new IOException("Could not marshal METS to file!", e);
        }
    }
    return outputDir;
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) ImgType(org.dea.fimgstoreclient.beans.ImgType) JAXBException(javax.xml.bind.JAXBException) TranskribusMetadataType(eu.transkribus.core.model.beans.pagecontent.TranskribusMetadataType) TranskribusMetadataType(eu.transkribus.core.model.beans.pagecontent.TranskribusMetadataType) MetadataType(eu.transkribus.core.model.beans.pagecontent.MetadataType) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) URI(java.net.URI) URL(java.net.URL) AltoExporter(eu.transkribus.core.model.builder.alto.AltoExporter) FimgStoreGetClient(org.dea.fimgstoreclient.FimgStoreGetClient) Mets(eu.transkribus.core.model.beans.mets.Mets) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) File(java.io.File) FimgStoreUriBuilder(org.dea.fimgstoreclient.utils.FimgStoreUriBuilder)

Example 4 with Mets

use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.

the class TrpDocPacker method packDocFiles.

/**
 * Zips a local TrpDoc into a file at the given zipFilePath.
 * The process involves computing MD5 sums for all files.
 * METS file will be included.
 *
 * @param doc
 * @param zipFilePath
 * @return
 * @throws IOException
 */
public File packDocFiles(TrpDoc doc, String zipFilePath) throws IOException {
    File localFolder = doc.getMd().getLocalFolder();
    if (localFolder == null) {
        throw new IOException("Not a local Document!");
    }
    Md5SumComputer md5Comp = new Md5SumComputer();
    md5Comp.addObserver(passthroughObserver);
    doc = md5Comp.computeAndSetMd5Sums(doc);
    if (zipFilePath == null || zipFilePath.isEmpty()) {
        logger.info("No zip file path specified.");
        zipFilePath = TEMP_DIR + File.separator + "TRP_DOC_" + System.currentTimeMillis() + ".zip";
    } else if (!(new File(zipFilePath).getParentFile().exists())) {
        throw new IllegalArgumentException(zipFilePath + " refers to a non-existent directory!");
    }
    logger.info("Creating zip file at: " + zipFilePath);
    String metsFilePath = localFolder.getAbsoluteFile() + File.separator + TrpMetsBuilder.METS_FILE_NAME;
    File metsFile = new File(metsFilePath);
    Mets mets;
    logger.info("Creating METS file at: " + metsFilePath);
    // build a mets that points to all files we need
    // 2nd arg: export page files (add to mets filesec), 3rd arg: export alto files, 4th arg: export images
    mets = TrpMetsBuilder.buildMets(doc, true, false, true, null);
    try {
        metsFile = JaxbUtils.marshalToFile(mets, metsFile, TrpDocMetadata.class);
    } catch (JAXBException e) {
        logger.error(e.getMessage(), e);
        throw new IOException("Could not create METS file.", e);
    }
    updateStatus("Built METS file");
    // prepare the list with files to be packed into the ZIP
    List<String> fileList = new LinkedList<>();
    fileList.add(TrpMetsBuilder.METS_FILE_NAME);
    // traverse the METS filesection and add all files to be zipped
    List<FileGrpType> typeGrps = MetsUtil.getMasterFileGrp(mets);
    for (FileGrpType type : typeGrps) {
        if (type.getID().equals(TrpMetsBuilder.IMG_GROUP_ID) || type.getID().equals(TrpMetsBuilder.PAGE_GROUP_ID)) {
            List<String> files = getFiles(type);
            fileList.addAll(files);
        }
    }
    updateStatus("Creating ZIP file...");
    File zipFile = ZipUtils.zip(fileList, localFolder.getAbsolutePath(), zipFilePath);
    return zipFile;
}
Also used : FileGrpType(eu.transkribus.core.model.beans.mets.FileGrpType) Md5SumComputer(eu.transkribus.core.io.util.Md5SumComputer) Mets(eu.transkribus.core.model.beans.mets.Mets) JAXBException(javax.xml.bind.JAXBException) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) IOException(java.io.IOException) File(java.io.File) LinkedList(java.util.LinkedList)

Example 5 with Mets

use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.

the class TrpMetsBuilderTest method createMets.

public static void createMets(File folder, boolean printResultOnSysOut) throws UnsupportedFormatException, IOException, JAXBException {
    if (folder == null || !folder.isDirectory())
        throw new IOException("Folder null or no directory!");
    TrpDoc doc = LocalDocReader.load(folder.getAbsolutePath());
    // System.out.println(doc.toString());
    // 2nd arg: export page files (add to mets filesec), 3rd arg: export alto files, 4th arg: export images
    Mets mets = TrpMetsBuilder.buildMets(doc, true, false, true, null);
    String outFile = folder.getAbsolutePath() + "/mets.xml";
    JaxbUtils.marshalToFile(mets, new File(outFile), TrpDocMetadata.class);
    if (printResultOnSysOut)
        JaxbUtils.marshalToSysOut(mets, TrpDocMetadata.class);
}
Also used : Mets(eu.transkribus.core.model.beans.mets.Mets) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) IOException(java.io.IOException) File(java.io.File)

Aggregations

Mets (eu.transkribus.core.model.beans.mets.Mets)10 File (java.io.File)7 IOException (java.io.IOException)6 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)5 TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)5 TrpPage (eu.transkribus.core.model.beans.TrpPage)3 JAXBException (javax.xml.bind.JAXBException)3 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2 FileGrpType (eu.transkribus.core.model.beans.mets.FileGrpType)2 URL (java.net.URL)2 Unmarshaller (javax.xml.bind.Unmarshaller)2 Schema (javax.xml.validation.Schema)2 FimgStoreGetClient (org.dea.fimgstoreclient.FimgStoreGetClient)2 Md5SumComputer (eu.transkribus.core.io.util.Md5SumComputer)1 ITrpFile (eu.transkribus.core.model.beans.ITrpFile)1 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)1 AmdSecType (eu.transkribus.core.model.beans.mets.AmdSecType)1 DivType (eu.transkribus.core.model.beans.mets.DivType)1 Fptr (eu.transkribus.core.model.beans.mets.DivType.Fptr)1 FileType (eu.transkribus.core.model.beans.mets.FileType)1