use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.
the class LocalDocReader method load.
public static TrpDoc load(TrpUpload upload) throws IOException {
// validate most necessary things
if (upload == null) {
throw new IllegalArgumentException("Upload is null.");
}
if (upload.getUploadId() < 1) {
throw new IllegalArgumentException("Invalid upload ID: " + upload.getUploadId());
}
if (!upload.canReadDirectories()) {
throw new IllegalArgumentException("Directories are not readable: " + upload.getUploadTmpDir().getAbsolutePath());
}
// transform the upload object into a TRP document
TrpDoc doc = new TrpDoc();
TrpDocMetadata md = upload.getMd();
md.setLocalFolder(upload.getUploadTmpDir());
doc.setMd(md);
File baseDir = upload.getUploadTmpDir();
File xmlDir = upload.getUploadPageTmpDir();
File thumbDir = new File(baseDir.getAbsolutePath() + File.separatorChar + LocalDocConst.THUMBS_FILE_SUB_FOLDER);
for (PageUploadDescriptor p : upload.getPages()) {
final int pageNr = p.getPageNr();
File img = new File(baseDir.getAbsolutePath() + File.separator + p.getFileName());
if (!img.isFile()) {
throw new FileNotFoundException("Image for page " + pageNr + " does not exist: " + img.getAbsolutePath());
}
// try to read image dimension in any case to detect corrupt files immediately!
Dimension dim = null;
String imageRemark = null;
try {
dim = ImgUtils.readImageDimensions(img);
} catch (CorruptImageException cie) {
logger.error("Image is corrupt: " + img.getAbsolutePath(), cie);
imageRemark = getCorruptImgMsg(img.getName());
}
final String imgBaseName = FilenameUtils.getBaseName(img.getName());
File thumb = getThumbFile(thumbDir, imgBaseName);
File pageXml = null;
if (!StringUtils.isEmpty(p.getPageXmlName())) {
pageXml = new File(xmlDir.getAbsolutePath() + File.separator + p.getPageXmlName());
if (!pageXml.isFile()) {
throw new FileNotFoundException("PAGE XML for page " + pageNr + " does not exist: " + img.getAbsolutePath());
}
} else if (StringUtils.isEmpty(imageRemark)) {
// if a problem occured when reading the image
File pageOutFile = new File(xmlDir.getAbsolutePath() + File.separatorChar + imgBaseName + ".xml");
PcGtsType pc = PageXmlUtils.createEmptyPcGtsType(img, dim);
try {
pageXml = JaxbUtils.marshalToFile(pc, pageOutFile);
} catch (JAXBException je) {
logger.error(je.getMessage(), je);
throw new IOException("Could not create empty PageXml on disk!", je);
}
}
TrpPage page = buildPage(baseDir, pageNr, img, pageXml, thumb, dim, imageRemark);
doc.getPages().add(page);
}
return doc;
}
use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.
the class TrpDocUploadBuilder method buildPageUploadDescriptor.
private static PageUploadDescriptor buildPageUploadDescriptor(TrpPage p) {
PageUploadDescriptor i = new PageUploadDescriptor();
i.setFileName(p.getImgFileName());
i.setPageNr(p.getPageNr());
if (!StringUtils.isEmpty(p.getMd5Sum())) {
i.setImgChecksum(p.getMd5Sum());
}
// add transcript if any
if (!p.getTranscripts().isEmpty()) {
TrpTranscriptMetadata t = p.getCurrentTranscript();
i.setPageXmlName(t.getXmlFileName());
if (!StringUtils.isEmpty(t.getMd5Sum())) {
i.setPageXmlChecksum(t.getMd5Sum());
}
}
return i;
}
use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.
the class TrpDocUploadBuilder method validateAndNormalize.
/**
*Ensures that all images have filenames assigned and page indices are iterated throughout the structure
* If page indices start from 0 they will be incremented by 1 in order to be compatible with METS-style counting.
* If XML filenames have the "page/" dir prefix, it will be removed.
* @param pages
*/
public static void validateAndNormalize(List<PageUploadDescriptor> pages) {
if (pages.isEmpty()) {
throw new IllegalArgumentException("Image list is empty!");
}
ImgFilenameFilter imgNameFilter = new ImgFilenameFilter();
// check page indices
int i = pages.get(0).getPageNr();
// check if it starts with 1 or 0
boolean pageCountFromZero = false;
if (i == 0) {
// increment all indexes by 1
pageCountFromZero = true;
} else if (i < 0 || i > 1) {
throw new IllegalArgumentException("page indexes have to start with 1 or 0!");
}
for (PageUploadDescriptor img : pages) {
// check page indexes for continuity
if (img.getPageNr() != i) {
throw new IllegalArgumentException("Page indexes are inconsistent!");
} else {
i++;
}
// correct the index if counting starts from zero as METS also includes counts starting from 1
if (pageCountFromZero) {
img.setPageNr(img.getPageNr() + 1);
}
// ensure that at least the img filename is set
if (StringUtils.isEmpty(img.getFileName())) {
throw new IllegalArgumentException("Image filename is empty for page index: " + img.getPageNr());
}
if (!imgNameFilter.accept(null, img.getFileName())) {
throw new IllegalArgumentException("Image type is not supported: " + img.getFileName());
}
if (!StringUtils.isEmpty(img.getPageXmlName()) && img.getPageXmlName().startsWith(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/")) {
// remove the "page/" prefix in XML filename if existent
img.setPageXmlName(img.getPageXmlName().replaceFirst(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/", ""));
}
}
}
use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.
the class MetsUtil method buildUploadImage.
private static PageUploadDescriptor buildUploadImage(DivType div, List<FileType> imgGrp, List<FileType> xmlGrp) {
PageUploadDescriptor image = new PageUploadDescriptor();
int pageIndex = div.getORDER().intValue() - 1;
image.setPageNr(pageIndex);
String imgFileName = null;
String xmlFileName = null;
String imgChecksum = null;
String xmlChecksum = null;
for (Fptr ptr : div.getFptr()) {
FileType type = (FileType) ptr.getArea().getFILEID();
final Pair<String, String> fileNameAndChecksum = MetsUtil.getFileNameAndChecksum(type);
if (imgGrp.contains(type)) {
imgFileName = fileNameAndChecksum.getLeft();
if (!IMG_NAME_FILTER.accept(null, imgFileName)) {
throw new IllegalArgumentException("Image type is not supported: " + imgFileName);
}
imgChecksum = fileNameAndChecksum.getRight();
} else if (xmlGrp != null && xmlGrp.contains(type)) {
xmlFileName = fileNameAndChecksum.getLeft();
xmlChecksum = fileNameAndChecksum.getRight();
if (!StringUtils.isEmpty(xmlFileName) && xmlFileName.startsWith(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/")) {
// remove the "page/" prefix in XML filename if existent
xmlFileName = xmlFileName.replaceFirst(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/", "");
}
}
}
if (StringUtils.isEmpty(imgFileName)) {
logger.error("No master image mapped for page index = " + pageIndex + " in the structmap!");
} else {
logger.info("Page " + image.getPageNr() + " image: " + imgFileName);
}
image.setFileName(imgFileName);
image.setImgChecksum(imgChecksum);
image.setPageXmlName(xmlFileName);
image.setPageXmlChecksum(xmlChecksum);
return image;
}
use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.
the class MetsUtil method getImagesToUpload.
public static List<PageUploadDescriptor> getImagesToUpload(Mets mets) {
// check filesection. needs img group and xml group to distinguish them without going for mimetypes
List<FileGrpType> typeGrps = getMasterFileGrp(mets);
boolean hasXml = true;
List<FileType> xmlGrp = null;
List<FileType> imgGrp = null;
for (FileGrpType type : typeGrps) {
switch(type.getID()) {
case TrpMetsBuilder.IMG_GROUP_ID:
imgGrp = type.getFile();
break;
case TrpMetsBuilder.PAGE_GROUP_ID:
xmlGrp = type.getFile();
break;
default:
break;
}
}
if (imgGrp == null) {
throw new IllegalArgumentException("METS file has no image file list!");
}
if (xmlGrp == null) {
logger.debug("METS file has no xml file list!");
}
List<DivType> pageDivs = getPageDivsFromStructMap(mets);
if (pageDivs == null)
throw new IllegalArgumentException("No valid StructMap was found!");
List<PageUploadDescriptor> images = new ArrayList<PageUploadDescriptor>(pageDivs.size());
for (DivType div : pageDivs) {
PageUploadDescriptor image = buildUploadImage(div, imgGrp, xmlGrp);
images.add(image);
}
return images;
}
Aggregations