use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.
the class FEPLocalDocReader method parsePhysicalStructure.
static List<HashMap<String, File>> parsePhysicalStructure(File inputDir, Mets mets) throws IOException {
StructMapType physSm = findStructMap(mets, PHYSICAL_STRUCT_MAP_LABEL);
DivType rootDiv = physSm.getDiv();
// sort divs by order:
Collections.sort(rootDiv.getDiv(), new Comparator<DivType>() {
@Override
public int compare(DivType o1, DivType o2) {
return o1.getORDER().compareTo(o2.getORDER());
}
});
List<HashMap<String, File>> fepFileGrps = new ArrayList<>();
// parse them bloody divs:
for (DivType div : rootDiv.getDiv()) {
if (div.getFptr().size() != 1)
throw new IOException("Error parsing physical structure: nr of fptr elements is not 1 in div: " + div.getFptr().size() + ", id: " + div.getID());
ParType par = div.getFptr().get(0).getPar();
if (par == null)
throw new IOException("Error parsing physical structure: could not parse par element in fptr of div: " + div.getID());
HashMap<String, File> files = new HashMap<>();
for (Serializable o : par.getAreaOrSeq()) {
if (o instanceof AreaType) {
AreaType area = (AreaType) o;
FileType fileType = (FileType) area.getFILEID();
Pair<FileGrp, File> filePair = findFile(inputDir, mets, fileType.getID());
logger.debug("found file with id: " + fileType.getID() + ", path: " + filePair.getRight().getAbsolutePath());
files.put(filePair.getLeft().getID(), filePair.getRight());
}
}
fepFileGrps.add(files);
}
return fepFileGrps;
}
use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.
the class FEPLocalDocReader method setTitle.
static void setTitle(TrpDoc doc, Mets mets) throws IOException {
StructMapType physSm = findStructMap(mets, PHYSICAL_STRUCT_MAP_LABEL);
DivType rootDiv = physSm.getDiv();
String title = rootDiv.getLABEL();
doc.getMd().setTitle(title);
}
use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.
the class GoobiMetsImporter method fetchFiles.
/**
* @param mets: The unmarshalled Goobi Mets file
* @return
* @throws IOException
*/
public List<TrpPage> fetchFiles(String dir, Mets mets) throws IOException {
List<FileGrp> fileGrps = mets.getFileSec().getFileGrp();
List<FileType> xmlGrp = null;
List<FileType> imgGrp = null;
List<FileType> defaultImgGrp = null;
for (FileGrpType type : fileGrps) {
switch(type.getUSE()) {
case "MAX":
imgGrp = type.getFile();
break;
/*
* could also be that USE='Content' and ID="AltoFiles" or ID="AbbyyXmlFiles" is necessary to get the transcriptions
*/
case "DEFAULT":
defaultImgGrp = type.getFile();
break;
case "XML":
// possibility to load also an existent Alto or Abbyy XML and convert it to Page later on
// TODO: Abklären
xmlGrp = type.getFile();
break;
default:
break;
}
}
// take default images if no MAX images are available
if (imgGrp == null && defaultImgGrp != null) {
imgGrp = defaultImgGrp;
}
if (imgGrp == null)
throw new IOException("METS file has no image file list!");
if (xmlGrp == null) {
logger.debug("no xml file list");
// throw new IOException("METS file has no xml file list!");
}
List<DivType> pageDivs = null;
for (StructMapType sMap : mets.getStructMap()) {
if (sMap.getTYPE().equals("PHYSICAL") && // && sMap.getDiv().getID().equals("PHYS_0000")){
sMap.getDiv().getTYPE().equals("physSequence")) {
pageDivs = sMap.getDiv().getDiv();
break;
}
}
if (pageDivs == null)
throw new IOException("No valid StructMap was found!");
List<TrpPage> pages = new ArrayList<TrpPage>(pageDivs.size());
// Implement a reverse-order Comparator by lambda function
Comparator<DivType> comp = (DivType a, DivType b) -> {
return a.getORDER().compareTo(b.getORDER());
};
pageDivs.sort(comp);
for (DivType div : pageDivs) {
// fetch all files and store them locally
TrpPage p = fetchFilesFromUrl(div, imgGrp, xmlGrp, dir);
pages.add(p);
}
return pages;
}
use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.
the class MetsUtil method getImagesToUpload.
public static List<PageUploadDescriptor> getImagesToUpload(Mets mets) {
// check filesection. needs img group and xml group to distinguish them without going for mimetypes
List<FileGrpType> typeGrps = getMasterFileGrp(mets);
boolean hasXml = true;
List<FileType> xmlGrp = null;
List<FileType> imgGrp = null;
for (FileGrpType type : typeGrps) {
switch(type.getID()) {
case TrpMetsBuilder.IMG_GROUP_ID:
imgGrp = type.getFile();
break;
case TrpMetsBuilder.PAGE_GROUP_ID:
xmlGrp = type.getFile();
break;
default:
break;
}
}
if (imgGrp == null) {
throw new IllegalArgumentException("METS file has no image file list!");
}
if (xmlGrp == null) {
logger.debug("METS file has no xml file list!");
}
List<DivType> pageDivs = getPageDivsFromStructMap(mets);
if (pageDivs == null)
throw new IllegalArgumentException("No valid StructMap was found!");
List<PageUploadDescriptor> images = new ArrayList<PageUploadDescriptor>(pageDivs.size());
for (DivType div : pageDivs) {
PageUploadDescriptor image = buildUploadImage(div, imgGrp, xmlGrp);
images.add(image);
}
return images;
}
use of eu.transkribus.core.model.beans.mets.DivType in project TranskribusCore by Transkribus.
the class MetsUtil method getTrpPages.
/**
* Builds the set of TrpPage objects with
* local file references from the mets master file group and structmap.
* The method is strict regarding PAGE XML existence! Each image file must have a correspondent PAGE XML.
* @param mets
* @param parentDir
* @return
* @throws IOException
*/
public static List<TrpPage> getTrpPages(Mets mets, File parentDir) throws IOException {
// check filesection. needs img group and xml group to distinguish them without going for mimetypes
List<FileGrpType> typeGrps = getMasterFileGrp(mets);
List<FileType> xmlGrp = null;
List<FileType> imgGrp = null;
for (FileGrpType type : typeGrps) {
switch(type.getID()) {
case TrpMetsBuilder.IMG_GROUP_ID:
imgGrp = type.getFile();
break;
case TrpMetsBuilder.PAGE_GROUP_ID:
xmlGrp = type.getFile();
break;
default:
break;
}
}
if (imgGrp == null)
throw new IOException("METS file has no image file list!");
if (xmlGrp == null)
throw new IOException("METS file has no xml file list!");
List<DivType> pageDivs = getPageDivsFromStructMap(mets);
if (pageDivs == null) {
throw new IOException("No valid StructMap was found!");
}
List<TrpPage> pages = new ArrayList<TrpPage>(pageDivs.size());
for (DivType div : pageDivs) {
TrpPage page = buildPage(div, imgGrp, xmlGrp, parentDir);
pages.add(page);
}
return pages;
}
Aggregations