Search in sources :

Example 1 with DivType

use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.

the class FEPLocalDocReader method parsePhysicalStructure.

static List<HashMap<String, File>> parsePhysicalStructure(File inputDir, Mets mets) throws IOException {
    StructMapType physSm = findStructMap(mets, PHYSICAL_STRUCT_MAP_LABEL);
    DivType rootDiv = physSm.getDiv();
    // sort divs by order:
    Collections.sort(rootDiv.getDiv(), new Comparator<DivType>() {

        @Override
        public int compare(DivType o1, DivType o2) {
            return o1.getORDER().compareTo(o2.getORDER());
        }
    });
    List<HashMap<String, File>> fepFileGrps = new ArrayList<>();
    // parse them bloody divs:
    for (DivType div : rootDiv.getDiv()) {
        if (div.getFptr().size() != 1)
            throw new IOException("Error parsing physical structure: nr of fptr elements is not 1 in div: " + div.getFptr().size() + ", id: " + div.getID());
        ParType par = div.getFptr().get(0).getPar();
        if (par == null)
            throw new IOException("Error parsing physical structure: could not parse par element in fptr of div: " + div.getID());
        HashMap<String, File> files = new HashMap<>();
        for (Serializable o : par.getAreaOrSeq()) {
            if (o instanceof AreaType) {
                AreaType area = (AreaType) o;
                FileType fileType = (FileType) area.getFILEID();
                Pair<FileGrp, File> filePair = findFile(inputDir, mets, fileType.getID());
                logger.debug("found file with id: " + fileType.getID() + ", path: " + filePair.getRight().getAbsolutePath());
                files.put(filePair.getLeft().getID(), filePair.getRight());
            }
        }
        fepFileGrps.add(files);
    }
    return fepFileGrps;
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileGrp(eu.transkribus.core.model.beans.mets.MetsType.FileSec.FileGrp) IOException(java.io.IOException) AreaType(eu.transkribus.core.model.beans.mets.AreaType) DivType(eu.transkribus.core.model.beans.mets.DivType) FileType(eu.transkribus.core.model.beans.mets.FileType) StructMapType(eu.transkribus.core.model.beans.mets.StructMapType) ParType(eu.transkribus.core.model.beans.mets.ParType) File(java.io.File)

Example 2 with DivType

use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.

the class FEPLocalDocReader method setTitle.

static void setTitle(TrpDoc doc, Mets mets) throws IOException {
    StructMapType physSm = findStructMap(mets, PHYSICAL_STRUCT_MAP_LABEL);
    DivType rootDiv = physSm.getDiv();
    String title = rootDiv.getLABEL();
    doc.getMd().setTitle(title);
}
Also used : DivType(eu.transkribus.core.model.beans.mets.DivType) StructMapType(eu.transkribus.core.model.beans.mets.StructMapType)

Example 3 with DivType

use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.

the class GoobiMetsImporter method fetchFiles.

/**
 * @param mets: The unmarshalled Goobi Mets file
 * @return
 * @throws IOException
 */
public List<TrpPage> fetchFiles(String dir, Mets mets) throws IOException {
    List<FileGrp> fileGrps = mets.getFileSec().getFileGrp();
    List<FileType> xmlGrp = null;
    List<FileType> imgGrp = null;
    List<FileType> defaultImgGrp = null;
    for (FileGrpType type : fileGrps) {
        switch(type.getUSE()) {
            case "MAX":
                imgGrp = type.getFile();
                break;
            /*
				 * could also be that USE='Content' and ID="AltoFiles" or ID="AbbyyXmlFiles"  is necessary to get the transcriptions
				 */
            case "DEFAULT":
                defaultImgGrp = type.getFile();
                break;
            case "XML":
                // possibility to load also an existent Alto or Abbyy XML and convert it to Page later on
                // TODO: Abklären
                xmlGrp = type.getFile();
                break;
            default:
                break;
        }
    }
    // take default images if no MAX images are available
    if (imgGrp == null && defaultImgGrp != null) {
        imgGrp = defaultImgGrp;
    }
    if (imgGrp == null)
        throw new IOException("METS file has no image file list!");
    if (xmlGrp == null) {
        logger.debug("no xml file list");
    // throw new IOException("METS file has no xml file list!");
    }
    List<DivType> pageDivs = null;
    for (StructMapType sMap : mets.getStructMap()) {
        if (sMap.getTYPE().equals("PHYSICAL") && // && sMap.getDiv().getID().equals("PHYS_0000")){
        sMap.getDiv().getTYPE().equals("physSequence")) {
            pageDivs = sMap.getDiv().getDiv();
            break;
        }
    }
    if (pageDivs == null)
        throw new IOException("No valid StructMap was found!");
    List<TrpPage> pages = new ArrayList<TrpPage>(pageDivs.size());
    // Implement a reverse-order Comparator by lambda function
    Comparator<DivType> comp = (DivType a, DivType b) -> {
        return a.getORDER().compareTo(b.getORDER());
    };
    pageDivs.sort(comp);
    for (DivType div : pageDivs) {
        // fetch all files and store them locally
        TrpPage p = fetchFilesFromUrl(div, imgGrp, xmlGrp, dir);
        pages.add(p);
    }
    return pages;
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) FileGrp(eu.transkribus.core.model.beans.mets.MetsType.FileSec.FileGrp) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileGrpType(eu.transkribus.core.model.beans.mets.FileGrpType) DivType(eu.transkribus.core.model.beans.mets.DivType) FileType(eu.transkribus.core.model.beans.mets.FileType) StructMapType(eu.transkribus.core.model.beans.mets.StructMapType)

Example 4 with DivType

use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.

the class MetsUtil method getImagesToUpload.

public static List<PageUploadDescriptor> getImagesToUpload(Mets mets) {
    // check filesection. needs img group and xml group to distinguish them without going for mimetypes
    List<FileGrpType> typeGrps = getMasterFileGrp(mets);
    boolean hasXml = true;
    List<FileType> xmlGrp = null;
    List<FileType> imgGrp = null;
    for (FileGrpType type : typeGrps) {
        switch(type.getID()) {
            case TrpMetsBuilder.IMG_GROUP_ID:
                imgGrp = type.getFile();
                break;
            case TrpMetsBuilder.PAGE_GROUP_ID:
                xmlGrp = type.getFile();
                break;
            default:
                break;
        }
    }
    if (imgGrp == null) {
        throw new IllegalArgumentException("METS file has no image file list!");
    }
    if (xmlGrp == null) {
        logger.debug("METS file has no xml file list!");
    }
    List<DivType> pageDivs = getPageDivsFromStructMap(mets);
    if (pageDivs == null)
        throw new IllegalArgumentException("No valid StructMap was found!");
    List<PageUploadDescriptor> images = new ArrayList<PageUploadDescriptor>(pageDivs.size());
    for (DivType div : pageDivs) {
        PageUploadDescriptor image = buildUploadImage(div, imgGrp, xmlGrp);
        images.add(image);
    }
    return images;
}
Also used : FileGrpType(eu.transkribus.core.model.beans.mets.FileGrpType) DivType(eu.transkribus.core.model.beans.mets.DivType) FileType(eu.transkribus.core.model.beans.mets.FileType) ArrayList(java.util.ArrayList) PageUploadDescriptor(eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)

Example 5 with DivType

use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.

the class MetsUtil method getTrpPages.

/**
 * Builds the set of TrpPage objects with
 * local file references from the mets master file group and structmap.
 * The method is strict regarding PAGE XML existence! Each image file must have a correspondent PAGE XML.
 * @param mets
 * @param parentDir
 * @return
 * @throws IOException
 */
public static List<TrpPage> getTrpPages(Mets mets, File parentDir) throws IOException {
    // check filesection. needs img group and xml group to distinguish them without going for mimetypes
    List<FileGrpType> typeGrps = getMasterFileGrp(mets);
    List<FileType> xmlGrp = null;
    List<FileType> imgGrp = null;
    for (FileGrpType type : typeGrps) {
        switch(type.getID()) {
            case TrpMetsBuilder.IMG_GROUP_ID:
                imgGrp = type.getFile();
                break;
            case TrpMetsBuilder.PAGE_GROUP_ID:
                xmlGrp = type.getFile();
                break;
            default:
                break;
        }
    }
    if (imgGrp == null)
        throw new IOException("METS file has no image file list!");
    if (xmlGrp == null)
        throw new IOException("METS file has no xml file list!");
    List<DivType> pageDivs = getPageDivsFromStructMap(mets);
    if (pageDivs == null) {
        throw new IOException("No valid StructMap was found!");
    }
    List<TrpPage> pages = new ArrayList<TrpPage>(pageDivs.size());
    for (DivType div : pageDivs) {
        TrpPage page = buildPage(div, imgGrp, xmlGrp, parentDir);
        pages.add(page);
    }
    return pages;
}
Also used : FileGrpType(eu.transkribus.core.model.beans.mets.FileGrpType) DivType(eu.transkribus.core.model.beans.mets.DivType) FileType(eu.transkribus.core.model.beans.mets.FileType) TrpPage(eu.transkribus.core.model.beans.TrpPage) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Aggregations

DivType (eu.transkribus.core.model.beans.mets.DivType)7 FileType (eu.transkribus.core.model.beans.mets.FileType)5 StructMapType (eu.transkribus.core.model.beans.mets.StructMapType)5 FileGrpType (eu.transkribus.core.model.beans.mets.FileGrpType)4 ArrayList (java.util.ArrayList)4 TrpPage (eu.transkribus.core.model.beans.TrpPage)3 FileGrp (eu.transkribus.core.model.beans.mets.MetsType.FileSec.FileGrp)3 IOException (java.io.IOException)3 File (java.io.File)2 HashMap (java.util.HashMap)2 PageUploadDescriptor (eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)1 ITrpFile (eu.transkribus.core.model.beans.ITrpFile)1 TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)1 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)1 AmdSecType (eu.transkribus.core.model.beans.mets.AmdSecType)1 AreaType (eu.transkribus.core.model.beans.mets.AreaType)1 Fptr (eu.transkribus.core.model.beans.mets.DivType.Fptr)1 FLocat (eu.transkribus.core.model.beans.mets.FileType.FLocat)1 MdSecType (eu.transkribus.core.model.beans.mets.MdSecType)1 Mets (eu.transkribus.core.model.beans.mets.Mets)1