use of org.dea.fimgstoreclient.FimgStoreGetClient in project TranskribusCore by Transkribus.
the class DocExporter method exportDoc.
/**
* Export current document with the provided parameters.
* @param doc current document
* @param pars export settings
* @return directory to which the export files were written
* @throws IOException
* @throws IllegalArgumentException
* @throws URISyntaxException
* @throws JAXBException
* @throws TransformerException
*/
public File exportDoc(TrpDoc doc, CommonExportPars pars) throws IOException, IllegalArgumentException, URISyntaxException, JAXBException, TransformerException {
FimgStoreGetClient getter = null;
FimgStoreUriBuilder uriBuilder = null;
ImgType imgType = pars.getRemoteImgQuality() == null ? ImgType.orig : pars.getRemoteImgQuality();
if (doc.isRemoteDoc()) {
// FIXME fimagestore path should be read from docMd!
getter = new FimgStoreGetClient("dbis-thure.uibk.ac.at", "f");
final String scheme = pars.isUseHttps() ? "https" : "http";
final int port = pars.isUseHttps() ? 443 : 80;
uriBuilder = new FimgStoreUriBuilder(scheme, getter.getHost(), port, getter.getServerContext());
}
// create copy of object, as we alter it here while exporting
TrpDoc doc2;
doc2 = new TrpDoc(doc);
// check and create output directory
File outputDir = new File(pars.getDir());
if (!pars.isDoOverwrite() && outputDir.exists()) {
throw new IOException("File path already exists.");
}
outputDir.mkdir();
// decide where to put the images
final File imgOutputDir;
if (pars.isUseOcrMasterDir()) {
imgOutputDir = new File(outputDir.getAbsolutePath() + File.separatorChar + LocalDocConst.OCR_MASTER_DIR);
imgOutputDir.mkdir();
} else {
imgOutputDir = outputDir;
}
File pageOutputDir = null, altoOutputDir = null;
// check PAGE export settings and create output directory
String pageDirName = pars.getPageDirName();
if (pars.isDoExportPageXml() && !StringUtils.isEmpty(pageDirName)) {
pageOutputDir = new File(outputDir.getAbsolutePath() + File.separatorChar + pageDirName);
if (pageOutputDir.mkdir()) {
logger.debug("pageOutputDir created successfully ");
} else {
logger.debug("pageOutputDir could not be created!");
}
} else {
// if pageDirName is not set, export the PAGE XMLs to imgOutputDir
pageOutputDir = imgOutputDir;
}
// check Alto export settings and create output directory
AltoExporter altoEx = new AltoExporter();
if (pars.isDoExportAltoXml()) {
altoOutputDir = altoEx.createAltoOuputDir(doc2, outputDir.getAbsolutePath());
}
// check and write metadata
if (doc2.getMd() != null) {
File fileOut = new File(outputDir.getAbsolutePath() + File.separatorChar + LocalDocConst.METADATA_FILENAME);
try {
JaxbUtils.marshalToFile(doc2.getMd(), fileOut);
} catch (JAXBException e) {
throw new IOException("Could not marshal metadata to file.", e);
}
}
List<TrpPage> pages = doc2.getPages();
Set<Integer> pageIndices = pars.getPageIndices(doc.getNPages());
// do export for all defined pages
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i)) {
continue;
}
TrpPage p = pages.get(i);
File imgFile = null, xmlFile = null, altoFile = null;
URL imgUrl = p.getUrl();
final String baseFileName = ExportFilePatternUtils.buildBaseFileName(pars.getFileNamePattern(), p);
final String imgExt = "." + FilenameUtils.getExtension(p.getImgFileName());
final String xmlExt = ".xml";
// gather remote files and export document
if (doc2.isRemoteDoc()) {
if (pars.isDoWriteImages()) {
final String msg = "Downloading " + imgType.toString() + " image for page nr. " + p.getPageNr();
logger.debug(msg);
updateStatus(msg);
final URI imgUri = uriBuilder.getImgUri(p.getKey(), imgType);
imgFile = getter.saveFile(imgUri, imgOutputDir.getAbsolutePath(), baseFileName + imgExt);
p.setUrl(imgFile.toURI().toURL());
p.setKey(null);
}
if (pars.isDoExportPageXml()) {
// old
// TrpTranscriptMetadata t = p.getCurrentTranscript();
/*
* new: to get the previously stored chosen version
*/
TrpTranscriptMetadata transcriptMd;
JAXBPageTranscript transcript = cache.getPageTranscriptAtIndex(i);
// set up transcript metadata
if (transcript == null) {
transcriptMd = p.getCurrentTranscript();
logger.warn("Have to unmarshall transcript in DocExporter for transcript " + transcriptMd + " - should have been built before using ExportUtils::storePageTranscripts4Export!");
transcript = new JAXBPageTranscript(transcriptMd);
transcript.build();
} else {
transcriptMd = transcript.getMd();
}
URL xmlUrl = transcriptMd.getUrl();
if (pars.isExportTranscriptMetadata()) {
MetadataType md = transcript.getPage().getPcGtsType().getMetadata();
if (md == null) {
throw new JAXBException("Transcript does not contain a metadata element: " + transcriptMd);
}
String imgUrlStr = CoreUtils.urlToString(imgUrl);
String xmlUrlStr = CoreUtils.urlToString(xmlUrl);
String status = transcriptMd.getStatus() == null ? null : transcriptMd.getStatus().toString();
TranskribusMetadataType tmd = new TranskribusMetadataType();
tmd.setDocId(doc.getId());
tmd.setPageId(p.getPageId());
tmd.setPageNr(p.getPageNr());
tmd.setTsid(transcriptMd.getTsId());
tmd.setStatus(status);
tmd.setUserId(transcriptMd.getUserId());
tmd.setImgUrl(imgUrlStr);
tmd.setXmlUrl(xmlUrlStr);
tmd.setImageId(p.getImageId());
md.setTranskribusMetadata(tmd);
}
// write transcript to file
xmlFile = new File(FilenameUtils.normalizeNoEndSeparator(pageOutputDir.getAbsolutePath()) + File.separator + baseFileName + xmlExt);
logger.debug("PAGE XMl output file: " + xmlFile.getAbsolutePath());
transcript.write(xmlFile);
// old code: save file by just downloading to disk
// xmlFile = getter.saveFile(transcriptMd.getUrl().toURI(), pageOutputDir.getAbsolutePath(), baseFileName + xmlExt);
// make sure (for other exports) that the transcript that is exported is the only one set in the transcripts list of TrpPage
p.getTranscripts().clear();
TrpTranscriptMetadata tCopy = new TrpTranscriptMetadata(transcriptMd, p);
tCopy.setUrl(xmlFile.toURI().toURL());
p.getTranscripts().add(tCopy);
}
} else {
updateStatus("Copying local files for page nr. " + p.getPageNr());
// copy local files during export
if (pars.isDoWriteImages()) {
imgFile = LocalDocWriter.copyImgFile(p, p.getUrl(), imgOutputDir.getAbsolutePath(), baseFileName + imgExt);
}
if (pars.isDoExportPageXml()) {
xmlFile = LocalDocWriter.copyTranscriptFile(p, pageOutputDir.getAbsolutePath(), baseFileName + xmlExt, cache);
}
}
// export alto:
if (pars.isDoExportAltoXml()) {
altoFile = altoEx.exportAltoFile(p, baseFileName + xmlExt, altoOutputDir, pars.isSplitIntoWordsInAltoXml());
}
if (imgFile != null)
logger.debug("Written image file " + imgFile.getAbsolutePath());
if (xmlFile != null) {
logger.debug("Written transcript xml file " + xmlFile.getAbsolutePath());
} else {
logger.warn("No transcript was exported for page ");
}
if (altoFile != null) {
logger.debug("Written ALTO xml file " + altoFile.getAbsolutePath());
} else {
logger.warn("No alto was exported for page ");
}
setChanged();
notifyObservers(Integer.valueOf(p.getPageNr()));
}
if (pars.isDoWriteMets()) {
// load the exported doc from its new location
// FIXME this does not work for export of PAGE XMLs only!
// final TrpDoc localDoc = LocalDocReader.load(outputDir.getAbsolutePath(), false);
// set local folder or else TrpMetsBuilder will treat this as remote doc!
doc2.getMd().setLocalFolder(outputDir);
// write mets with file pointers to local files
Mets mets = TrpMetsBuilder.buildMets(doc2, pars.isDoExportPageXml(), pars.isDoExportAltoXml(), pars.isDoWriteImages(), pageIndices);
File metsFile = new File(outputDir.getAbsolutePath() + File.separator + TrpMetsBuilder.METS_FILE_NAME);
try {
JaxbUtils.marshalToFile(mets, metsFile, TrpDocMetadata.class);
} catch (JAXBException e) {
throw new IOException("Could not marshal METS to file!", e);
}
}
return outputDir;
}
use of org.dea.fimgstoreclient.FimgStoreGetClient in project TranskribusCore by Transkribus.
the class FimgStoreReadConnection method getFileMd.
public static FimgStoreFileMd getFileMd(URL url) throws IOException {
FimgStoreGetClient getter = new FimgStoreGetClient(url);
final String key;
try {
key = FimgStoreUtils.extractKey(url);
} catch (URISyntaxException e) {
throw new IOException("Could not extract key from url: " + url.toString(), e);
}
return getter.getFileMd(key);
}
use of org.dea.fimgstoreclient.FimgStoreGetClient in project TranskribusCore by Transkribus.
the class PdfExporter method export.
public File export(final TrpDoc doc, final String path, Set<Integer> pageIndices, final boolean useWordLevel, final boolean addTextPages, final boolean imagesOnly, final boolean highlightTags, final boolean doBlackening, boolean createTitle, ExportCache cache) throws DocumentException, MalformedURLException, IOException, JAXBException, URISyntaxException, InterruptedException {
if (doc == null) {
throw new IllegalArgumentException("TrpDoc is null!");
}
if (path == null) {
throw new IllegalArgumentException("path is null!");
}
if (cache == null) {
cache = new ExportCache();
}
// if(startPage == null || startPage < 1) startPage = 1;
// final int nrOfPages = doc.getPages().size();
// if(endPage == null || endPage > nrOfPages+1) endPage = nrOfPages;
//
// if(startPage > endPage){
// throw new IllegalArgumentException("Start page must be smaller than end page!");
// }
File pdfFile = new File(path);
TrpPdfDocument pdf = new TrpPdfDocument(pdfFile, useWordLevel, highlightTags, doBlackening, createTitle);
setChanged();
notifyObservers("Creating PDF document...");
boolean onePagePrinted = false;
// for(int i = startPage-1; i <= endPage-1; i++){
for (int i = 0; i < doc.getPages().size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
logger.info("Processing page " + (i + 1));
TrpPage p = doc.getPages().get(i);
URL imgUrl = p.getUrl();
/*
* md is only needed for getting resolution because in the image it may be missing
* But if it is a local doc we have to try to get from img because md is null
*/
FimgStoreImgMd md = null;
if (doc.isRemoteDoc()) {
FimgStoreGetClient getter = new FimgStoreGetClient(p.getUrl());
md = (FimgStoreImgMd) getter.getFileMd(p.getKey());
}
URL xmlUrl = p.getCurrentTranscript().getUrl();
logger.debug("output with tags " + highlightTags);
// PcGtsType pc = PageXmlUtils.unmarshal(xmlUrl);
// should be the same as above
JAXBPageTranscript pt = null;
if (cache != null) {
pt = cache.getPageTranscriptAtIndex(i);
}
PcGtsType pc;
if (pt != null) {
pc = pt.getPageData();
} else {
pc = PageXmlUtils.unmarshal(xmlUrl);
}
if (!onePagePrinted) {
// add first page and previously add a title page with doc metadata and editorial declarations (if this option is set)
pdf.addPage(imgUrl, doc, pc, addTextPages, imagesOnly, md, doBlackening, cache);
onePagePrinted = true;
} else {
pdf.addPage(imgUrl, null, pc, addTextPages, imagesOnly, md, doBlackening, cache);
}
setChanged();
notifyObservers(Integer.valueOf(i + 1));
if (cancel) {
pdf.close();
File file = new File(path);
if (!file.delete()) {
throw new IOException("Could not delete the incomplete PDF file during export cancel");
}
throw new InterruptedException("Export canceled by the user");
// break;
}
}
if (highlightTags) {
pdf.addTags(doc, pageIndices, useWordLevel, cache);
}
pdf.close();
setChanged();
notifyObservers("PDF written at: " + path);
logger.info("PDF written at: " + path);
return pdfFile;
}
use of org.dea.fimgstoreclient.FimgStoreGetClient in project TranskribusCore by Transkribus.
the class TrpMetsBuilder method buildMets.
/**
* Generate a METS containing
* <ul>
* <li>TrpDocMetadata embedded in sourceMd</li>
* <li>all page images</li>
* <li>the most recent PAGE XML files from the Doc</li>
* </ul>
*
* If a local document is passed, all hrefs will contain the relative paths to files based on the localFolder!
*
* @param doc
* @param exportImages
* @param pageIndices
* @return
* @throws IOException if image/xml files can't be accessed for reading the mimetype etc.
*/
public static Mets buildMets(TrpDoc doc, boolean exportPage, boolean exportAlto, boolean exportImages, Set<Integer> pageIndices) throws IOException {
Mets mets = new Mets();
TrpDocMetadata md = doc.getMd();
File localFolder = md.getLocalFolder();
boolean isLocalDoc = localFolder != null;
mets.setLABEL(md.getTitle());
mets.setOBJID("" + md.getDocId());
mets.setPROFILE(TRP_METS_PROFILE);
// FIXME remove TYPE
// mets.setTYPE(TRP_METS_PROFILE);
// metsHdr
MetsHdr hdr = buildMetsHdr(md);
mets.setMetsHdr(hdr);
// TODO dcmd_elec omitted meanwhile
// md_orig
AmdSecType amdSec = new AmdSecType();
amdSec.setID(SOURCE_MD_ID_CONST);
MdSecType sourceMdSec = buildSourceMdSec(md);
amdSec.getSourceMD().add(sourceMdSec);
mets.getAmdSec().add(amdSec);
// structmap div, linking to the sourceMd section with dmd
DivType div = new DivType();
div.getADMID().add(sourceMdSec);
div.setID(TRP_DOC_DIV_ID);
FileSec fileSec = new FileSec();
StructMapType structMap = new StructMapType();
structMap.setID(TRP_STRUCTMAP_ID);
structMap.setTYPE("MANUSCRIPT");
structMap.setDiv(div);
List<TrpPage> pages = doc.getPages();
FimgStoreGetClient client = null;
if (!isLocalDoc) {
// TODO maybe we need this stuff in the docMetadata?
URL url = pages.get(0).getUrl();
client = new FimgStoreGetClient(url);
}
FileGrp masterGrp = new FileGrp();
masterGrp.setID(MASTER_FILE_GRP_ID);
FileGrpType imgGrp = new FileGrpType();
imgGrp.setID(IMG_GROUP_ID);
FileGrpType pageGrp = new FileGrpType();
pageGrp.setID(PAGE_GROUP_ID);
FileGrpType altoGrp = new FileGrpType();
altoGrp.setID(ALTO_GROUP_ID);
int i = -1;
for (TrpPage p : pages) {
i++;
if (pageIndices != null && !pageIndices.contains(i)) {
continue;
}
// build a page div for the structmap
DivType pageDiv = new DivType();
pageDiv.setID("PAGE_" + p.getPageNr());
pageDiv.setTYPE("SINGLE_PAGE");
pageDiv.setORDER(BigInteger.valueOf(p.getPageNr()));
final String imgId = "IMG_" + p.getPageNr();
final String xmlId = PAGE_GROUP_ID + "_" + p.getPageNr();
final String altoId = ALTO_GROUP_ID + "_" + p.getPageNr();
/* only the most recent transcript is added here for now
*
* TODO how to deal with imagestore files? use orig image? right now, it's just the view file...
* TODO thumbnails not yet included
*/
if (exportImages) {
FileType img = buildFileType(localFolder, imgId, p, p.getPageNr(), client);
imgGrp.getFile().add(img);
// linking images
Fptr imgPtr = buildFptr(img);
pageDiv.getFptr().add(imgPtr);
}
// TODO error handling.. if no transcript??
if (exportPage) {
// xmlfiletype: just add the most recent transcript
TrpTranscriptMetadata tMd;
// get the transcript chosen for export
tMd = p.getCurrentTranscript();
FileType xml = buildFileType(md.getLocalFolder(), xmlId, tMd, p.getPageNr(), client);
pageGrp.getFile().add(xml);
Fptr xmlPtr = buildFptr(xml);
pageDiv.getFptr().add(xmlPtr);
}
// creat ALTO fileGrp
if (exportAlto) {
FileType altoFt = new FileType();
altoFt.setCHECKSUMTYPE(ChecksumUtils.ChkSumAlg.MD5.toString());
// TODO calculate checksum
altoFt.setCHECKSUM("");
FLocat fLocat = new FLocat();
fLocat.setLOCTYPE("OTHER");
fLocat.setOTHERLOCTYPE("FILE");
altoFt.setID(altoId);
altoFt.setSEQ(p.getPageNr());
// String tmpImgName = img.getFLocat().get(0).getHref();
String relAltoPath = "alto".concat(File.separator).concat(p.getImgFileName().substring(0, p.getImgFileName().lastIndexOf(".")).concat(".xml"));
fLocat.setHref(relAltoPath);
// String absAltoPath = tMd.getUrl().getPath().replace("page", "alto");
final String path = FileUtils.toFile(p.getUrl()).getAbsolutePath();
String absAltoPath = path.substring(0, path.lastIndexOf(File.separator));
absAltoPath = absAltoPath.concat("/alto/").concat(p.getImgFileName().substring(0, p.getImgFileName().lastIndexOf(".")).concat(".xml"));
// logger.info("alto path starts with: " + absAltoPath);
if (absAltoPath.startsWith("\\")) /*|| absAltoPath.startsWith("/")*/
{
// logger.info("alto path starts with \\ or /");
absAltoPath = absAltoPath.substring(1);
}
String mime = MimeTypes.getMimeType("xml");
altoFt.setMIMETYPE(mime);
File altoTmp = new File(absAltoPath);
if (altoTmp.exists()) {
// logger.info("alto file exist at " + absAltoPath);
Date date = new Date(altoTmp.lastModified());
XMLGregorianCalendar cal = JaxbUtils.getXmlCalendar(date);
altoFt.setCREATED(cal);
} else {
logger.info("alto file does not exist at " + absAltoPath);
}
// System.out.println("relAltoPath " + relAltoPath);
// System.out.println("absAltoPath " + absAltoPath);
// System.in.read();
altoFt.getFLocat().add(fLocat);
altoGrp.getFile().add(altoFt);
Fptr altoPtr = buildFptr(altoFt);
pageDiv.getFptr().add(altoPtr);
}
div.getDiv().add(pageDiv);
}
fileSec.getFileGrp().add(masterGrp);
mets.setFileSec(fileSec);
if (exportImages) {
masterGrp.getFileGrp().add(imgGrp);
}
if (exportPage) {
masterGrp.getFileGrp().add(pageGrp);
}
if (exportAlto) {
masterGrp.getFileGrp().add(altoGrp);
}
mets.getStructMap().add(structMap);
return mets;
}
Aggregations