use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method writeRtfForDoc.
public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, File file, Set<Integer> pageIndices, IProgressMonitor monitor) throws JAXBException, IOException {
Rtf rtf = Rtf.rtf();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to RTF", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
logger.debug("RTF export cancelled!");
return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
rtf.section(getRtfParagraphsForTranscript(trpPage, wordBased));
++c;
if (monitor != null) {
monitor.worked(c);
}
}
rtf.out(new FileWriter(file));
logger.info("wrote rtf to: " + file.getAbsolutePath());
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class ExportCache method storePageTranscripts4Export.
public void storePageTranscripts4Export(TrpDoc doc, Set<Integer> pageIndices, IProgressMonitor monitor, String versionStatus, int pageIdx, TrpTranscriptMetadata loadedTranscript) throws Exception {
pageTranscripts = new ArrayList<JAXBPageTranscript>();
List<TrpPage> pages = doc.getPages();
int totalPages = pages.size();
int c = 0;
for (int i = 0; i < totalPages; ++i) {
if (pageIndices != null && !pageIndices.contains(i)) {
// fill up with null to have the proper index of each page later on
// logger.debug(" add null to transcripts " + i);
pageTranscripts.add(null);
continue;
}
if (monitor != null && monitor.isCanceled()) {
throw new Exception("User canceled the export");
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = null;
if (versionStatus.contains("Latest")) {
// current transcript
md = page.getCurrentTranscript();
} else if (versionStatus.contains("Loaded")) {
// if loaded page idx == i than we can export the loaded version and for all other pages the latest
if (i == pageIdx && loadedTranscript != null) {
md = loadedTranscript;
// String loadedStatus = loadedTranscript.getStatus().getStr();
// md = page.getTranscriptWithStatus(loadedStatus);
} else {
md = page.getCurrentTranscript();
}
} else {
// logger.debug("We want to export pages with status: " + versionStatus);
md = page.getTranscriptWithStatusOrNull(versionStatus);
}
/*
* for pages where we have not found versions with the defined status -> remove from the page list so that
* they will not exported
*/
if (md == null) {
// pageIndices contained but has not the desired status -> add transcript 0;
if (pageIndices != null && pageIndices.contains(i)) {
// logger.debug("remove page index " + i);
pageIndices.remove(new Integer(i));
pageTranscripts.add(null);
}
continue;
}
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
pageTranscripts.add(tr);
logger.debug("Loaded Transcript from page " + (i + 1));
if (monitor != null) {
monitor.setTaskName("Loaded Transcript from page " + (i + 1));
monitor.worked(++c);
}
}
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class FEPLocalDocReader method loadFEPDoc.
public static TrpDoc loadFEPDoc(final String path, boolean validateMets, boolean preserveOcrTxtStyles, boolean preserveOcrFontFamily, boolean replaceBadChars, IProgressMonitor monitor) throws Exception {
final File inputDir = new File(path);
logger.info("importing FEP document from path: " + path);
ProgressUtils.beginTask(monitor, "Importing a FEP document", -1);
ProgressUtils.subTask(monitor, "Parsing mets");
// find mets file:
File metsFile = findMetsFile(inputDir);
// unmarshall mets:
Mets mets = unmarshalMets(metsFile, validateMets);
// create trp-document and set metadata:
TrpDoc trpDoc = new TrpDoc();
setTitle(trpDoc, mets);
trpDoc.getMd().setDesc("Imported from FEP export");
trpDoc.getMd().setLocalFolder(inputDir);
File pageDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.PAGE_FILE_SUB_FOLDER);
File thumbDir = new File(inputDir.getAbsolutePath() + "/" + LocalDocConst.THUMBS_FILE_SUB_FOLDER);
// parse physical structure:
List<HashMap<String, File>> physStruct = parsePhysicalStructure(inputDir, mets);
final int nPages = physStruct.size();
ProgressUtils.beginTask(monitor, "Importing a FEP document", nPages);
// create PAGEs:
List<TrpPage> pages = new ArrayList<TrpPage>(nPages);
int pageNr = 0;
for (HashMap<String, File> files : physStruct) {
ProgressUtils.subTask(monitor, "Importing page " + pageNr);
++pageNr;
logger.debug("page: " + pageNr + ", nr of files: " + files.size());
// first, check if image file is there and set some variables:
if (!files.containsKey(IMG_GRP))
throw new IOException("Image file for page " + pageNr + " could not be found!");
File imgFile = files.get(IMG_GRP);
;
String imgFileBn = FilenameUtils.getBaseName(imgFile.getName());
File thumbFile = LocalDocReader.getThumbFile(thumbDir, imgFileBn);
File pageOutFile = new File(pageDir.getAbsolutePath() + "/" + imgFileBn + ".xml");
FileUtils.forceMkdir(pageOutFile.getParentFile());
if (files.containsKey(ALTO_GRP)) {
File altoFile = files.get(ALTO_GRP);
PcGtsType pc = LocalDocReader.createPageFromAlto2(imgFile.getName(), altoFile, preserveOcrTxtStyles, preserveOcrFontFamily, replaceBadChars);
pageOutFile = JaxbUtils.marshalToFile(pc, pageOutFile);
} else {
throw new IOException("ALTO file for image " + pageNr + " could not be found!");
// TODO: create empty page file -> NO!
}
// TODO is is assumed that the image is not corrupt here! Try to read dimension to be sure
TrpPage page = LocalDocReader.buildPage(inputDir, pageNr, imgFile, pageOutFile, thumbFile, null, null);
// exract logical structs for this page from mets and apply them to the page:
applyLogicalStructFromMetsToPageFile(mets, pageNr, pageOutFile);
pages.add(page);
ProgressUtils.worked(monitor, pageNr);
}
trpDoc.setPages(pages);
return trpDoc;
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class LocalDocReader method buildPage.
// private static void startThumbCreationThread(final TrpDoc doc) {
// Runnable thumbCreator = new Runnable(){
// @Override
// public void run() {
// try{
// LocalDocWriter.createThumbsForDoc(doc, false);
// }catch (Exception e){
// logger.error(e);
// return;
// }
// }
// };
// new Thread(thumbCreator).start();
// }
/**
* Builds a TrpPage object with file URLs set
* @param inputDir the path where the local document is stored
*
* @param pageNr
* of the page to be built
* @param img
* the img file to include
* @param pageXml
* the corresponding PAGE XML
* @param thumb
* the thumbnail file for this image
* @param useDummyImage
* treat the image file as corrupt/missing.
* XML may then be null as none could be created due to missing Dimension.
* @return a TrpPage object with Transcript. The Transcript is null, if
* pageXml is null.
* @throws MalformedURLException if an URL can't be constructed from parentDir
*/
protected static TrpPage buildPage(File inputDir, int pageNr, File img, File pageXml, File thumb, Dimension dim, final String missingImageRemark) throws IOException {
logger.debug(pageNr + ": XML = " + (pageXml == null ? "null" : pageXml.getName()) + " - IMG = " + (img == null ? "null" : img.getName()));
// FIXME handle broken images
TrpPage page = new TrpPage();
page.setPageNr(pageNr);
page.setKey(null);
page.setDocId(-1);
if (img != null) {
page.setImgFileName(img.getName());
final URL imgUrl = img.toURI().toURL();
page.setUrl(imgUrl);
} else {
page.setImgFileName(LocalDocConst.NO_IMAGE_FILENAME);
}
if (!StringUtils.isEmpty(missingImageRemark)) {
URL dummyUrl = LocalDocConst.getDummyImageUrl();
page.setUrl(dummyUrl);
page.setImgFileProblem(missingImageRemark);
}
if (thumb != null) {
final URL thumbUrl = thumb.toURI().toURL();
page.setThumbUrl(thumbUrl);
}
if (dim != null) {
page.setWidth(dim.width);
page.setHeight(dim.height);
}
if (pageXml != null) {
final URL xmlUrl = pageXml.toURI().toURL();
TrpTranscriptMetadata tmd = new TrpTranscriptMetadata();
tmd.setPageReferenceForLocalDocs(page);
tmd.setPageNr(pageNr);
tmd.setKey(null);
tmd.setUrl(xmlUrl);
tmd.setStatus(EditStatus.NEW);
tmd.setLocalFolder(inputDir);
tmd.setTimestamp(new Date().getTime());
tmd.setUserName("LocalDocReader");
// TODO real status, time and user parsed from PageXML?
page.getTranscripts().add(tmd);
}
return page;
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class Md5SumComputer method computeAndSetMd5Sums.
public TrpDoc computeAndSetMd5Sums(TrpDoc doc) throws IOException {
if (doc == null) {
throw new IllegalArgumentException("doc is null.");
}
File localFolder = doc.getMd().getLocalFolder();
if (localFolder == null) {
throw new IllegalArgumentException("Not a local Document!");
}
updateStatus("Computing checksums...");
for (TrpPage p : doc.getPages()) {
updateStatus("Computing checksum: " + getFileNameFromUrl(p.getUrl()));
p.setMd5Sum(ChecksumUtils.getMd5SumHex(p.getUrl()));
for (TrpTranscriptMetadata t : p.getTranscripts()) {
updateStatus("Computing checksum: " + getFileNameFromUrl(t.getUrl()));
t.setMd5Sum(ChecksumUtils.getMd5SumHex(t.getUrl()));
}
}
return doc;
}
Aggregations