Search in sources :

Example 1 with OcrMetadata

use of eu.transkribus.core.model.beans.fat.OcrMetadata in project TranskribusCore by Transkribus.

the class FatBuilder method writeFatXml.

public static File writeFatXml(File outputDir, final String languages, final String typeFace) throws UnsupportedFormatException, IOException {
    if (!new File(outputDir.getAbsolutePath() + File.separator + LocalDocConst.OCR_MASTER_DIR).isDirectory()) {
        throw new IllegalArgumentException("No directory '" + LocalDocConst.OCR_MASTER_DIR + "' in directory: " + outputDir.getAbsolutePath());
    }
    // needs a local doc! Read files separately because we don't want to create Page XMLs
    Map<String, File> imgFiles = LocalDocReader.findImgFiles(outputDir);
    TrpDocMetadata docMd = LocalDocReader.loadDocMd(outputDir);
    // final DocType docType = doc.getMd().getType();
    // if(!DocType.PRINT.equals(docType)){
    // throw new IllegalArgumentException("DocType " + docType + " not allowed for FAT XML production");
    // }
    RootFolder rootFolder = new RootFolder();
    SimpleDateFormat df = new SimpleDateFormat();
    df.applyPattern("yyyy-MM-dd hh:mm");
    final String dateStr = df.format(new Date());
    rootFolder.setDate(dateStr);
    final BigInteger nFiles = getBigIntValue(imgFiles.size());
    rootFolder.setNFiles(nFiles);
    rootFolder.setNDocuments(BigInteger.valueOf(1));
    rootFolder.setNFileWarnings(BigInteger.valueOf(0));
    rootFolder.setNFolders(BigInteger.valueOf(1));
    DocumentFolder docFolder = new DocumentFolder();
    docFolder.setName(outputDir.getName());
    docFolder.setPath(LocalDocConst.OCR_MASTER_DIR);
    docFolder.setNFilesPerFolder(nFiles);
    // TODO throw exception if missingMetadata is true?
    boolean missingMetadata = false;
    Order order = new Order();
    order.setHasViewingFiles("false");
    order.setServices("(OCR)");
    OcrMetadata ocrM = new OcrMetadata();
    if (languages != null && !languages.isEmpty()) {
        ocrM.setLanguages(languages);
    } else if (docMd.getLanguage() != null && !docMd.getLanguage().isEmpty()) {
        ocrM.setLanguages(docMd.getLanguage());
    } else {
        missingMetadata = true;
        ocrM.setLanguages("");
    }
    if (typeFace != null && !typeFace.isEmpty()) {
        ocrM.setTexttype(typeFace);
    } else if (docMd.getScriptType() != null) {
        ocrM.setTexttype(docMd.getScriptType().toString());
    } else {
        ocrM.setTexttype(ScriptType.NORMAL.toString());
        missingMetadata = true;
    }
    ocrM.setOutput("(ABBYY-XML)");
    // check the following!
    int nDocsMissingMetadata = missingMetadata ? 1 : 0;
    rootFolder.setNDocumentsMissingMetadata(BigInteger.valueOf(nDocsMissingMetadata));
    FepMetadata fep = new FepMetadata();
    fep.setWorkflow("None");
    order.setOcrMetadata(ocrM);
    order.setFepMetadata(fep);
    docFolder.setOrder(order);
    FileFolder fileFolder = new FileFolder();
    fileFolder.setType("img");
    // existence of OCR_MASTER_DIR is checked at the beginning
    fileFolder.setName(LocalDocConst.OCR_MASTER_DIR);
    fileFolder.setPath(LocalDocConst.OCR_MASTER_DIR);
    // List<TrpPage> pages = doc.getPages();
    int checkedFiles = 0;
    int uncheckedFiles = 0;
    int nFileErrors = 0;
    for (Entry<String, File> imgE : imgFiles.entrySet()) {
        final File img = imgE.getValue();
        eu.transkribus.core.model.beans.fat.File file = new eu.transkribus.core.model.beans.fat.File();
        file.setName(img.getName());
        String errorType;
        String message;
        try {
            final Map<String, String> exif = ExiftoolUtil.extractImgMd(img.getAbsolutePath());
            final String mimetype = exif.get("MIMEType");
            final String xRes = exif.get("XResolution");
            final String yRes = exif.get("YResolution");
            final String width = exif.get("ImageWidth");
            final String height = exif.get("ImageHeight");
            Metadata md = new Metadata();
            md.setMimetype(mimetype);
            md.setXRes(getBigIntValue(xRes));
            md.setYRes(getBigIntValue(yRes));
            md.setWidth(getBigIntValue(width));
            md.setHeight(getBigIntValue(height));
            final String md5 = formatChecksum(ChecksumUtils.getMd5SumHex(img));
            md.setChecksum(md5);
            file.setMetadata(md);
            checkedFiles++;
            file.setStatus("Checked");
            errorType = "None";
            message = "";
        } catch (TimeoutException | InterruptedException | NumberFormatException e) {
            uncheckedFiles++;
            nFileErrors++;
            errorType = e.getClass().getName();
            message = e.getMessage();
            file.setStatus("Error");
            logger.error("Could not run file checks for file: " + img.getAbsolutePath(), e);
        }
        file.setErrorType(errorType);
        file.setMessage(message);
        fileFolder.getFile().add(file);
    }
    rootFolder.setNCheckedFiles(getBigIntValue(checkedFiles));
    rootFolder.setNUncheckedFiles(getBigIntValue(uncheckedFiles));
    rootFolder.setNFileErrors(getBigIntValue(nFileErrors));
    docFolder.getFileFolder().add(fileFolder);
    rootFolder.getDocumentFolder().add(docFolder);
    File fatFile = new File(outputDir.getAbsolutePath() + File.separator + FatBuilder.FAT_FILE_NAME);
    try {
        fatFile = JaxbUtils.marshalToFile(rootFolder, fatFile);
    } catch (JAXBException e) {
        throw new IOException("Could not marshal FAT XML to file!", e);
    }
    return fatFile;
}
Also used : DocumentFolder(eu.transkribus.core.model.beans.fat.DocumentFolder) Metadata(eu.transkribus.core.model.beans.fat.File.Metadata) FepMetadata(eu.transkribus.core.model.beans.fat.FepMetadata) OcrMetadata(eu.transkribus.core.model.beans.fat.OcrMetadata) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) FileFolder(eu.transkribus.core.model.beans.fat.FileFolder) TimeoutException(java.util.concurrent.TimeoutException) Order(eu.transkribus.core.model.beans.fat.Order) JAXBException(javax.xml.bind.JAXBException) IOException(java.io.IOException) Date(java.util.Date) OcrMetadata(eu.transkribus.core.model.beans.fat.OcrMetadata) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) RootFolder(eu.transkribus.core.model.beans.fat.RootFolder) BigInteger(java.math.BigInteger) FepMetadata(eu.transkribus.core.model.beans.fat.FepMetadata) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat)

Aggregations

TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)1 DocumentFolder (eu.transkribus.core.model.beans.fat.DocumentFolder)1 FepMetadata (eu.transkribus.core.model.beans.fat.FepMetadata)1 Metadata (eu.transkribus.core.model.beans.fat.File.Metadata)1 FileFolder (eu.transkribus.core.model.beans.fat.FileFolder)1 OcrMetadata (eu.transkribus.core.model.beans.fat.OcrMetadata)1 Order (eu.transkribus.core.model.beans.fat.Order)1 RootFolder (eu.transkribus.core.model.beans.fat.RootFolder)1 File (java.io.File)1 IOException (java.io.IOException)1 BigInteger (java.math.BigInteger)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 TimeoutException (java.util.concurrent.TimeoutException)1 JAXBException (javax.xml.bind.JAXBException)1