use of eu.transkribus.core.model.beans.fat.OcrMetadata in project TranskribusCore by Transkribus.
the class FatBuilder method writeFatXml.
public static File writeFatXml(File outputDir, final String languages, final String typeFace) throws UnsupportedFormatException, IOException {
if (!new File(outputDir.getAbsolutePath() + File.separator + LocalDocConst.OCR_MASTER_DIR).isDirectory()) {
throw new IllegalArgumentException("No directory '" + LocalDocConst.OCR_MASTER_DIR + "' in directory: " + outputDir.getAbsolutePath());
}
// needs a local doc! Read files separately because we don't want to create Page XMLs
Map<String, File> imgFiles = LocalDocReader.findImgFiles(outputDir);
TrpDocMetadata docMd = LocalDocReader.loadDocMd(outputDir);
// final DocType docType = doc.getMd().getType();
// if(!DocType.PRINT.equals(docType)){
// throw new IllegalArgumentException("DocType " + docType + " not allowed for FAT XML production");
// }
RootFolder rootFolder = new RootFolder();
SimpleDateFormat df = new SimpleDateFormat();
df.applyPattern("yyyy-MM-dd hh:mm");
final String dateStr = df.format(new Date());
rootFolder.setDate(dateStr);
final BigInteger nFiles = getBigIntValue(imgFiles.size());
rootFolder.setNFiles(nFiles);
rootFolder.setNDocuments(BigInteger.valueOf(1));
rootFolder.setNFileWarnings(BigInteger.valueOf(0));
rootFolder.setNFolders(BigInteger.valueOf(1));
DocumentFolder docFolder = new DocumentFolder();
docFolder.setName(outputDir.getName());
docFolder.setPath(LocalDocConst.OCR_MASTER_DIR);
docFolder.setNFilesPerFolder(nFiles);
// TODO throw exception if missingMetadata is true?
boolean missingMetadata = false;
Order order = new Order();
order.setHasViewingFiles("false");
order.setServices("(OCR)");
OcrMetadata ocrM = new OcrMetadata();
if (languages != null && !languages.isEmpty()) {
ocrM.setLanguages(languages);
} else if (docMd.getLanguage() != null && !docMd.getLanguage().isEmpty()) {
ocrM.setLanguages(docMd.getLanguage());
} else {
missingMetadata = true;
ocrM.setLanguages("");
}
if (typeFace != null && !typeFace.isEmpty()) {
ocrM.setTexttype(typeFace);
} else if (docMd.getScriptType() != null) {
ocrM.setTexttype(docMd.getScriptType().toString());
} else {
ocrM.setTexttype(ScriptType.NORMAL.toString());
missingMetadata = true;
}
ocrM.setOutput("(ABBYY-XML)");
// check the following!
int nDocsMissingMetadata = missingMetadata ? 1 : 0;
rootFolder.setNDocumentsMissingMetadata(BigInteger.valueOf(nDocsMissingMetadata));
FepMetadata fep = new FepMetadata();
fep.setWorkflow("None");
order.setOcrMetadata(ocrM);
order.setFepMetadata(fep);
docFolder.setOrder(order);
FileFolder fileFolder = new FileFolder();
fileFolder.setType("img");
// existence of OCR_MASTER_DIR is checked at the beginning
fileFolder.setName(LocalDocConst.OCR_MASTER_DIR);
fileFolder.setPath(LocalDocConst.OCR_MASTER_DIR);
// List<TrpPage> pages = doc.getPages();
int checkedFiles = 0;
int uncheckedFiles = 0;
int nFileErrors = 0;
for (Entry<String, File> imgE : imgFiles.entrySet()) {
final File img = imgE.getValue();
eu.transkribus.core.model.beans.fat.File file = new eu.transkribus.core.model.beans.fat.File();
file.setName(img.getName());
String errorType;
String message;
try {
final Map<String, String> exif = ExiftoolUtil.extractImgMd(img.getAbsolutePath());
final String mimetype = exif.get("MIMEType");
final String xRes = exif.get("XResolution");
final String yRes = exif.get("YResolution");
final String width = exif.get("ImageWidth");
final String height = exif.get("ImageHeight");
Metadata md = new Metadata();
md.setMimetype(mimetype);
md.setXRes(getBigIntValue(xRes));
md.setYRes(getBigIntValue(yRes));
md.setWidth(getBigIntValue(width));
md.setHeight(getBigIntValue(height));
final String md5 = formatChecksum(ChecksumUtils.getMd5SumHex(img));
md.setChecksum(md5);
file.setMetadata(md);
checkedFiles++;
file.setStatus("Checked");
errorType = "None";
message = "";
} catch (TimeoutException | InterruptedException | NumberFormatException e) {
uncheckedFiles++;
nFileErrors++;
errorType = e.getClass().getName();
message = e.getMessage();
file.setStatus("Error");
logger.error("Could not run file checks for file: " + img.getAbsolutePath(), e);
}
file.setErrorType(errorType);
file.setMessage(message);
fileFolder.getFile().add(file);
}
rootFolder.setNCheckedFiles(getBigIntValue(checkedFiles));
rootFolder.setNUncheckedFiles(getBigIntValue(uncheckedFiles));
rootFolder.setNFileErrors(getBigIntValue(nFileErrors));
docFolder.getFileFolder().add(fileFolder);
rootFolder.getDocumentFolder().add(docFolder);
File fatFile = new File(outputDir.getAbsolutePath() + File.separator + FatBuilder.FAT_FILE_NAME);
try {
fatFile = JaxbUtils.marshalToFile(rootFolder, fatFile);
} catch (JAXBException e) {
throw new IOException("Could not marshal FAT XML to file!", e);
}
return fatFile;
}
Aggregations