use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.
the class GoobiMetsImporter method unmarshalMets.
private Mets unmarshalMets(File metsFile, boolean validate) throws IOException, JAXBException, SAXException {
Mets mets;
// try {
Unmarshaller u = JaxbUtils.createUnmarshaller(Mets.class, TrpDocMetadata.class);
long t = System.currentTimeMillis();
if (validate) {
Schema schema = XmlFormat.METS.getOrCompileSchema();
u.setSchema(schema);
}
Object o = u.unmarshal(metsFile);
mets = (Mets) o;
logger.debug("time for unmarshalling: " + (System.currentTimeMillis() - t) + ", validated: " + validate);
// mets = JaxbUtils.unmarshal(metsFile, Mets.class, nestedClassed);
// mets = JaxbUtils.unmarshal2(new FileInputStream(metsFile), Mets.class, true, false);
// } catch (Exception e) {
// throw new IOException("Could not unmarshal METS file!", e);
// }
logger.debug("unmarshalled mets file");
return mets;
}
use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.
the class GoobiMetsImporter method loadDocFromGoobiMets.
/**
* Reads the Mets metadata and fetches all files with help of the contained URLs
* into an temporarily folder and creates a TrpDoc out of it
*
* @param metsPath: path to the Goobi mets file
* @return
* @throws IOException
* @throws SAXException
* @throws JAXBException
*/
public TrpDoc loadDocFromGoobiMets(File metsFile, String localDirPath) throws IOException, JAXBException, SAXException {
TrpDocMetadata md;
Mets mets = JaxbUtils.unmarshal(metsFile, Mets.class, TrpDocMetadata.class);
String metsPath = metsFile.getAbsolutePath();
updateStatus("Reading metadata...");
// unmarshal TrpDocMetadata
md = readModsMetadata(XmlUtils.getDocumentFromFileWOE(metsPath));
// String localDir = System.getProperty("user.home") + File.separator + "GoobiTest" + File.separator + md.getTitle() + File.separator;
logger.debug("the local user home dir = " + localDirPath);
// System.in.read();
// collect files into "user.home" + "/GoobiTest/" + mods title
// fetchFiles(localDirPath, mets);
md.setLocalFolder(new File(localDirPath));
/*
* next line can disorder the ORDER of the pages of the Mets when filename length is not equal and we store
* the files temporary in a local folder instead of importing directly as we did now
*/
// final TrpDoc doc = LocalDocReader.load(localDirPath, true);
// overwrite metadata with the metadata read from the MODS section in the METS file
final TrpDoc doc = new TrpDoc();
doc.setMd(md);
doc.setPages(fetchFiles(localDirPath, mets));
return doc;
}
use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.
the class DocExporter method exportDoc.
/**
* Export current document with the provided parameters.
* @param doc current document
* @param pars export settings
* @return directory to which the export files were written
* @throws IOException
* @throws IllegalArgumentException
* @throws URISyntaxException
* @throws JAXBException
* @throws TransformerException
*/
public File exportDoc(TrpDoc doc, CommonExportPars pars) throws IOException, IllegalArgumentException, URISyntaxException, JAXBException, TransformerException {
FimgStoreGetClient getter = null;
FimgStoreUriBuilder uriBuilder = null;
ImgType imgType = pars.getRemoteImgQuality() == null ? ImgType.orig : pars.getRemoteImgQuality();
if (doc.isRemoteDoc()) {
// FIXME fimagestore path should be read from docMd!
getter = new FimgStoreGetClient("dbis-thure.uibk.ac.at", "f");
final String scheme = pars.isUseHttps() ? "https" : "http";
final int port = pars.isUseHttps() ? 443 : 80;
uriBuilder = new FimgStoreUriBuilder(scheme, getter.getHost(), port, getter.getServerContext());
}
// create copy of object, as we alter it here while exporting
TrpDoc doc2;
doc2 = new TrpDoc(doc);
// check and create output directory
File outputDir = new File(pars.getDir());
if (!pars.isDoOverwrite() && outputDir.exists()) {
throw new IOException("File path already exists.");
}
outputDir.mkdir();
// decide where to put the images
final File imgOutputDir;
if (pars.isUseOcrMasterDir()) {
imgOutputDir = new File(outputDir.getAbsolutePath() + File.separatorChar + LocalDocConst.OCR_MASTER_DIR);
imgOutputDir.mkdir();
} else {
imgOutputDir = outputDir;
}
File pageOutputDir = null, altoOutputDir = null;
// check PAGE export settings and create output directory
String pageDirName = pars.getPageDirName();
if (pars.isDoExportPageXml() && !StringUtils.isEmpty(pageDirName)) {
pageOutputDir = new File(outputDir.getAbsolutePath() + File.separatorChar + pageDirName);
if (pageOutputDir.mkdir()) {
logger.debug("pageOutputDir created successfully ");
} else {
logger.debug("pageOutputDir could not be created!");
}
} else {
// if pageDirName is not set, export the PAGE XMLs to imgOutputDir
pageOutputDir = imgOutputDir;
}
// check Alto export settings and create output directory
AltoExporter altoEx = new AltoExporter();
if (pars.isDoExportAltoXml()) {
altoOutputDir = altoEx.createAltoOuputDir(doc2, outputDir.getAbsolutePath());
}
// check and write metadata
if (doc2.getMd() != null) {
File fileOut = new File(outputDir.getAbsolutePath() + File.separatorChar + LocalDocConst.METADATA_FILENAME);
try {
JaxbUtils.marshalToFile(doc2.getMd(), fileOut);
} catch (JAXBException e) {
throw new IOException("Could not marshal metadata to file.", e);
}
}
List<TrpPage> pages = doc2.getPages();
Set<Integer> pageIndices = pars.getPageIndices(doc.getNPages());
// do export for all defined pages
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i)) {
continue;
}
TrpPage p = pages.get(i);
File imgFile = null, xmlFile = null, altoFile = null;
URL imgUrl = p.getUrl();
final String baseFileName = ExportFilePatternUtils.buildBaseFileName(pars.getFileNamePattern(), p);
final String imgExt = "." + FilenameUtils.getExtension(p.getImgFileName());
final String xmlExt = ".xml";
// gather remote files and export document
if (doc2.isRemoteDoc()) {
if (pars.isDoWriteImages()) {
final String msg = "Downloading " + imgType.toString() + " image for page nr. " + p.getPageNr();
logger.debug(msg);
updateStatus(msg);
final URI imgUri = uriBuilder.getImgUri(p.getKey(), imgType);
imgFile = getter.saveFile(imgUri, imgOutputDir.getAbsolutePath(), baseFileName + imgExt);
p.setUrl(imgFile.toURI().toURL());
p.setKey(null);
}
if (pars.isDoExportPageXml()) {
// old
// TrpTranscriptMetadata t = p.getCurrentTranscript();
/*
* new: to get the previously stored chosen version
*/
TrpTranscriptMetadata transcriptMd;
JAXBPageTranscript transcript = cache.getPageTranscriptAtIndex(i);
// set up transcript metadata
if (transcript == null) {
transcriptMd = p.getCurrentTranscript();
logger.warn("Have to unmarshall transcript in DocExporter for transcript " + transcriptMd + " - should have been built before using ExportUtils::storePageTranscripts4Export!");
transcript = new JAXBPageTranscript(transcriptMd);
transcript.build();
} else {
transcriptMd = transcript.getMd();
}
URL xmlUrl = transcriptMd.getUrl();
if (pars.isExportTranscriptMetadata()) {
MetadataType md = transcript.getPage().getPcGtsType().getMetadata();
if (md == null) {
throw new JAXBException("Transcript does not contain a metadata element: " + transcriptMd);
}
String imgUrlStr = CoreUtils.urlToString(imgUrl);
String xmlUrlStr = CoreUtils.urlToString(xmlUrl);
String status = transcriptMd.getStatus() == null ? null : transcriptMd.getStatus().toString();
TranskribusMetadataType tmd = new TranskribusMetadataType();
tmd.setDocId(doc.getId());
tmd.setPageId(p.getPageId());
tmd.setPageNr(p.getPageNr());
tmd.setTsid(transcriptMd.getTsId());
tmd.setStatus(status);
tmd.setUserId(transcriptMd.getUserId());
tmd.setImgUrl(imgUrlStr);
tmd.setXmlUrl(xmlUrlStr);
tmd.setImageId(p.getImageId());
md.setTranskribusMetadata(tmd);
}
// write transcript to file
xmlFile = new File(FilenameUtils.normalizeNoEndSeparator(pageOutputDir.getAbsolutePath()) + File.separator + baseFileName + xmlExt);
logger.debug("PAGE XMl output file: " + xmlFile.getAbsolutePath());
transcript.write(xmlFile);
// old code: save file by just downloading to disk
// xmlFile = getter.saveFile(transcriptMd.getUrl().toURI(), pageOutputDir.getAbsolutePath(), baseFileName + xmlExt);
// make sure (for other exports) that the transcript that is exported is the only one set in the transcripts list of TrpPage
p.getTranscripts().clear();
TrpTranscriptMetadata tCopy = new TrpTranscriptMetadata(transcriptMd, p);
tCopy.setUrl(xmlFile.toURI().toURL());
p.getTranscripts().add(tCopy);
}
} else {
updateStatus("Copying local files for page nr. " + p.getPageNr());
// copy local files during export
if (pars.isDoWriteImages()) {
imgFile = LocalDocWriter.copyImgFile(p, p.getUrl(), imgOutputDir.getAbsolutePath(), baseFileName + imgExt);
}
if (pars.isDoExportPageXml()) {
xmlFile = LocalDocWriter.copyTranscriptFile(p, pageOutputDir.getAbsolutePath(), baseFileName + xmlExt, cache);
}
}
// export alto:
if (pars.isDoExportAltoXml()) {
altoFile = altoEx.exportAltoFile(p, baseFileName + xmlExt, altoOutputDir, pars.isSplitIntoWordsInAltoXml());
}
if (imgFile != null)
logger.debug("Written image file " + imgFile.getAbsolutePath());
if (xmlFile != null) {
logger.debug("Written transcript xml file " + xmlFile.getAbsolutePath());
} else {
logger.warn("No transcript was exported for page ");
}
if (altoFile != null) {
logger.debug("Written ALTO xml file " + altoFile.getAbsolutePath());
} else {
logger.warn("No alto was exported for page ");
}
setChanged();
notifyObservers(Integer.valueOf(p.getPageNr()));
}
if (pars.isDoWriteMets()) {
// load the exported doc from its new location
// FIXME this does not work for export of PAGE XMLs only!
// final TrpDoc localDoc = LocalDocReader.load(outputDir.getAbsolutePath(), false);
// set local folder or else TrpMetsBuilder will treat this as remote doc!
doc2.getMd().setLocalFolder(outputDir);
// write mets with file pointers to local files
Mets mets = TrpMetsBuilder.buildMets(doc2, pars.isDoExportPageXml(), pars.isDoExportAltoXml(), pars.isDoWriteImages(), pageIndices);
File metsFile = new File(outputDir.getAbsolutePath() + File.separator + TrpMetsBuilder.METS_FILE_NAME);
try {
JaxbUtils.marshalToFile(mets, metsFile, TrpDocMetadata.class);
} catch (JAXBException e) {
throw new IOException("Could not marshal METS to file!", e);
}
}
return outputDir;
}
use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.
the class TrpDocPacker method packDocFiles.
/**
* Zips a local TrpDoc into a file at the given zipFilePath.
* The process involves computing MD5 sums for all files.
* METS file will be included.
*
* @param doc
* @param zipFilePath
* @return
* @throws IOException
*/
public File packDocFiles(TrpDoc doc, String zipFilePath) throws IOException {
File localFolder = doc.getMd().getLocalFolder();
if (localFolder == null) {
throw new IOException("Not a local Document!");
}
Md5SumComputer md5Comp = new Md5SumComputer();
md5Comp.addObserver(passthroughObserver);
doc = md5Comp.computeAndSetMd5Sums(doc);
if (zipFilePath == null || zipFilePath.isEmpty()) {
logger.info("No zip file path specified.");
zipFilePath = TEMP_DIR + File.separator + "TRP_DOC_" + System.currentTimeMillis() + ".zip";
} else if (!(new File(zipFilePath).getParentFile().exists())) {
throw new IllegalArgumentException(zipFilePath + " refers to a non-existent directory!");
}
logger.info("Creating zip file at: " + zipFilePath);
String metsFilePath = localFolder.getAbsoluteFile() + File.separator + TrpMetsBuilder.METS_FILE_NAME;
File metsFile = new File(metsFilePath);
Mets mets;
logger.info("Creating METS file at: " + metsFilePath);
// build a mets that points to all files we need
// 2nd arg: export page files (add to mets filesec), 3rd arg: export alto files, 4th arg: export images
mets = TrpMetsBuilder.buildMets(doc, true, false, true, null);
try {
metsFile = JaxbUtils.marshalToFile(mets, metsFile, TrpDocMetadata.class);
} catch (JAXBException e) {
logger.error(e.getMessage(), e);
throw new IOException("Could not create METS file.", e);
}
updateStatus("Built METS file");
// prepare the list with files to be packed into the ZIP
List<String> fileList = new LinkedList<>();
fileList.add(TrpMetsBuilder.METS_FILE_NAME);
// traverse the METS filesection and add all files to be zipped
List<FileGrpType> typeGrps = MetsUtil.getMasterFileGrp(mets);
for (FileGrpType type : typeGrps) {
if (type.getID().equals(TrpMetsBuilder.IMG_GROUP_ID) || type.getID().equals(TrpMetsBuilder.PAGE_GROUP_ID)) {
List<String> files = getFiles(type);
fileList.addAll(files);
}
}
updateStatus("Creating ZIP file...");
File zipFile = ZipUtils.zip(fileList, localFolder.getAbsolutePath(), zipFilePath);
return zipFile;
}
use of eu.transkribus.core.model.beans.mets.Mets in project TranskribusCore by Transkribus.
the class TrpMetsBuilderTest method createMets.
public static void createMets(File folder, boolean printResultOnSysOut) throws UnsupportedFormatException, IOException, JAXBException {
if (folder == null || !folder.isDirectory())
throw new IOException("Folder null or no directory!");
TrpDoc doc = LocalDocReader.load(folder.getAbsolutePath());
// System.out.println(doc.toString());
// 2nd arg: export page files (add to mets filesec), 3rd arg: export alto files, 4th arg: export images
Mets mets = TrpMetsBuilder.buildMets(doc, true, false, true, null);
String outFile = folder.getAbsolutePath() + "/mets.xml";
JaxbUtils.marshalToFile(mets, new File(outFile), TrpDocMetadata.class);
if (printResultOnSysOut)
JaxbUtils.marshalToSysOut(mets, TrpDocMetadata.class);
}
Aggregations