Search in sources :

Example 1 with PageUploadDescriptor

use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.

the class LocalDocReader method load.

public static TrpDoc load(TrpUpload upload) throws IOException {
    // validate most necessary things
    if (upload == null) {
        throw new IllegalArgumentException("Upload is null.");
    }
    if (upload.getUploadId() < 1) {
        throw new IllegalArgumentException("Invalid upload ID: " + upload.getUploadId());
    }
    if (!upload.canReadDirectories()) {
        throw new IllegalArgumentException("Directories are not readable: " + upload.getUploadTmpDir().getAbsolutePath());
    }
    // transform the upload object into a TRP document
    TrpDoc doc = new TrpDoc();
    TrpDocMetadata md = upload.getMd();
    md.setLocalFolder(upload.getUploadTmpDir());
    doc.setMd(md);
    File baseDir = upload.getUploadTmpDir();
    File xmlDir = upload.getUploadPageTmpDir();
    File thumbDir = new File(baseDir.getAbsolutePath() + File.separatorChar + LocalDocConst.THUMBS_FILE_SUB_FOLDER);
    for (PageUploadDescriptor p : upload.getPages()) {
        final int pageNr = p.getPageNr();
        File img = new File(baseDir.getAbsolutePath() + File.separator + p.getFileName());
        if (!img.isFile()) {
            throw new FileNotFoundException("Image for page " + pageNr + " does not exist: " + img.getAbsolutePath());
        }
        // try to read image dimension in any case to detect corrupt files immediately!
        Dimension dim = null;
        String imageRemark = null;
        try {
            dim = ImgUtils.readImageDimensions(img);
        } catch (CorruptImageException cie) {
            logger.error("Image is corrupt: " + img.getAbsolutePath(), cie);
            imageRemark = getCorruptImgMsg(img.getName());
        }
        final String imgBaseName = FilenameUtils.getBaseName(img.getName());
        File thumb = getThumbFile(thumbDir, imgBaseName);
        File pageXml = null;
        if (!StringUtils.isEmpty(p.getPageXmlName())) {
            pageXml = new File(xmlDir.getAbsolutePath() + File.separator + p.getPageXmlName());
            if (!pageXml.isFile()) {
                throw new FileNotFoundException("PAGE XML for page " + pageNr + " does not exist: " + img.getAbsolutePath());
            }
        } else if (StringUtils.isEmpty(imageRemark)) {
            // if a problem occured when reading the image
            File pageOutFile = new File(xmlDir.getAbsolutePath() + File.separatorChar + imgBaseName + ".xml");
            PcGtsType pc = PageXmlUtils.createEmptyPcGtsType(img, dim);
            try {
                pageXml = JaxbUtils.marshalToFile(pc, pageOutFile);
            } catch (JAXBException je) {
                logger.error(je.getMessage(), je);
                throw new IOException("Could not create empty PageXml on disk!", je);
            }
        }
        TrpPage page = buildPage(baseDir, pageNr, img, pageXml, thumb, dim, imageRemark);
        doc.getPages().add(page);
    }
    return doc;
}
Also used : CorruptImageException(eu.transkribus.core.exceptions.CorruptImageException) TrpPage(eu.transkribus.core.model.beans.TrpPage) JAXBException(javax.xml.bind.JAXBException) FileNotFoundException(java.io.FileNotFoundException) Dimension(java.awt.Dimension) IOException(java.io.IOException) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) File(java.io.File) PageUploadDescriptor(eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)

Example 2 with PageUploadDescriptor

use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.

the class TrpDocUploadBuilder method buildPageUploadDescriptor.

private static PageUploadDescriptor buildPageUploadDescriptor(TrpPage p) {
    PageUploadDescriptor i = new PageUploadDescriptor();
    i.setFileName(p.getImgFileName());
    i.setPageNr(p.getPageNr());
    if (!StringUtils.isEmpty(p.getMd5Sum())) {
        i.setImgChecksum(p.getMd5Sum());
    }
    // add transcript if any
    if (!p.getTranscripts().isEmpty()) {
        TrpTranscriptMetadata t = p.getCurrentTranscript();
        i.setPageXmlName(t.getXmlFileName());
        if (!StringUtils.isEmpty(t.getMd5Sum())) {
            i.setPageXmlChecksum(t.getMd5Sum());
        }
    }
    return i;
}
Also used : TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) PageUploadDescriptor(eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)

Example 3 with PageUploadDescriptor

use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.

the class TrpDocUploadBuilder method validateAndNormalize.

/**
 *Ensures that all images have filenames assigned and page indices are iterated throughout the structure
 * If page indices start from 0 they will be incremented by 1 in order to be compatible with METS-style counting.
 * If XML filenames have the "page/" dir prefix, it will be removed.
 * @param pages
 */
public static void validateAndNormalize(List<PageUploadDescriptor> pages) {
    if (pages.isEmpty()) {
        throw new IllegalArgumentException("Image list is empty!");
    }
    ImgFilenameFilter imgNameFilter = new ImgFilenameFilter();
    // check page indices
    int i = pages.get(0).getPageNr();
    // check if it starts with 1 or 0
    boolean pageCountFromZero = false;
    if (i == 0) {
        // increment all indexes by 1
        pageCountFromZero = true;
    } else if (i < 0 || i > 1) {
        throw new IllegalArgumentException("page indexes have to start with 1 or 0!");
    }
    for (PageUploadDescriptor img : pages) {
        // check page indexes for continuity
        if (img.getPageNr() != i) {
            throw new IllegalArgumentException("Page indexes are inconsistent!");
        } else {
            i++;
        }
        // correct the index if counting starts from zero as METS also includes counts starting from 1
        if (pageCountFromZero) {
            img.setPageNr(img.getPageNr() + 1);
        }
        // ensure that at least the img filename is set
        if (StringUtils.isEmpty(img.getFileName())) {
            throw new IllegalArgumentException("Image filename is empty for page index: " + img.getPageNr());
        }
        if (!imgNameFilter.accept(null, img.getFileName())) {
            throw new IllegalArgumentException("Image type is not supported: " + img.getFileName());
        }
        if (!StringUtils.isEmpty(img.getPageXmlName()) && img.getPageXmlName().startsWith(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/")) {
            // remove the "page/" prefix in XML filename if existent
            img.setPageXmlName(img.getPageXmlName().replaceFirst(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/", ""));
        }
    }
}
Also used : ImgFilenameFilter(eu.transkribus.core.io.util.ImgFilenameFilter) PageUploadDescriptor(eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)

Example 4 with PageUploadDescriptor

use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.

the class MetsUtil method buildUploadImage.

private static PageUploadDescriptor buildUploadImage(DivType div, List<FileType> imgGrp, List<FileType> xmlGrp) {
    PageUploadDescriptor image = new PageUploadDescriptor();
    int pageIndex = div.getORDER().intValue() - 1;
    image.setPageNr(pageIndex);
    String imgFileName = null;
    String xmlFileName = null;
    String imgChecksum = null;
    String xmlChecksum = null;
    for (Fptr ptr : div.getFptr()) {
        FileType type = (FileType) ptr.getArea().getFILEID();
        final Pair<String, String> fileNameAndChecksum = MetsUtil.getFileNameAndChecksum(type);
        if (imgGrp.contains(type)) {
            imgFileName = fileNameAndChecksum.getLeft();
            if (!IMG_NAME_FILTER.accept(null, imgFileName)) {
                throw new IllegalArgumentException("Image type is not supported: " + imgFileName);
            }
            imgChecksum = fileNameAndChecksum.getRight();
        } else if (xmlGrp != null && xmlGrp.contains(type)) {
            xmlFileName = fileNameAndChecksum.getLeft();
            xmlChecksum = fileNameAndChecksum.getRight();
            if (!StringUtils.isEmpty(xmlFileName) && xmlFileName.startsWith(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/")) {
                // remove the "page/" prefix in XML filename if existent
                xmlFileName = xmlFileName.replaceFirst(LocalDocConst.PAGE_FILE_SUB_FOLDER + "/", "");
            }
        }
    }
    if (StringUtils.isEmpty(imgFileName)) {
        logger.error("No master image mapped for page index = " + pageIndex + " in the structmap!");
    } else {
        logger.info("Page " + image.getPageNr() + " image: " + imgFileName);
    }
    image.setFileName(imgFileName);
    image.setImgChecksum(imgChecksum);
    image.setPageXmlName(xmlFileName);
    image.setPageXmlChecksum(xmlChecksum);
    return image;
}
Also used : FileType(eu.transkribus.core.model.beans.mets.FileType) Fptr(eu.transkribus.core.model.beans.mets.DivType.Fptr) PageUploadDescriptor(eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)

Example 5 with PageUploadDescriptor

use of eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor in project TranskribusCore by Transkribus.

the class MetsUtil method getImagesToUpload.

public static List<PageUploadDescriptor> getImagesToUpload(Mets mets) {
    // check filesection. needs img group and xml group to distinguish them without going for mimetypes
    List<FileGrpType> typeGrps = getMasterFileGrp(mets);
    boolean hasXml = true;
    List<FileType> xmlGrp = null;
    List<FileType> imgGrp = null;
    for (FileGrpType type : typeGrps) {
        switch(type.getID()) {
            case TrpMetsBuilder.IMG_GROUP_ID:
                imgGrp = type.getFile();
                break;
            case TrpMetsBuilder.PAGE_GROUP_ID:
                xmlGrp = type.getFile();
                break;
            default:
                break;
        }
    }
    if (imgGrp == null) {
        throw new IllegalArgumentException("METS file has no image file list!");
    }
    if (xmlGrp == null) {
        logger.debug("METS file has no xml file list!");
    }
    List<DivType> pageDivs = getPageDivsFromStructMap(mets);
    if (pageDivs == null)
        throw new IllegalArgumentException("No valid StructMap was found!");
    List<PageUploadDescriptor> images = new ArrayList<PageUploadDescriptor>(pageDivs.size());
    for (DivType div : pageDivs) {
        PageUploadDescriptor image = buildUploadImage(div, imgGrp, xmlGrp);
        images.add(image);
    }
    return images;
}
Also used : FileGrpType(eu.transkribus.core.model.beans.mets.FileGrpType) DivType(eu.transkribus.core.model.beans.mets.DivType) FileType(eu.transkribus.core.model.beans.mets.FileType) ArrayList(java.util.ArrayList) PageUploadDescriptor(eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)

Aggregations

PageUploadDescriptor (eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)5 FileType (eu.transkribus.core.model.beans.mets.FileType)2 CorruptImageException (eu.transkribus.core.exceptions.CorruptImageException)1 ImgFilenameFilter (eu.transkribus.core.io.util.ImgFilenameFilter)1 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)1 TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)1 TrpPage (eu.transkribus.core.model.beans.TrpPage)1 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)1 DivType (eu.transkribus.core.model.beans.mets.DivType)1 Fptr (eu.transkribus.core.model.beans.mets.DivType.Fptr)1 FileGrpType (eu.transkribus.core.model.beans.mets.FileGrpType)1 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)1 Dimension (java.awt.Dimension)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 JAXBException (javax.xml.bind.JAXBException)1