use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.
the class LocalDocReader method initDocMd.
/**
* Initiates the metadata object. E.g. Doc ID is set to -1 and the local folder is set to input dir.
* If no title is included, then the input dir name is used.
* @param docMd
* @param inputDir
* @param stripAllServerRelatedMetadata if true, then all server related data is removed from the metadata object: collections, symbolic image links, etc.
*/
private static void initDocMd(TrpDocMetadata docMd, File inputDir, boolean stripAllServerRelatedMetadata) {
if (inputDir == null) {
throw new IllegalArgumentException("Input dir must not be null.");
}
if (docMd == null) {
docMd = new TrpDocMetadata();
}
docMd.setLocalFolder(inputDir);
docMd.setDocId(-1);
if (StringUtils.isEmpty(docMd.getTitle())) {
docMd.setTitle(inputDir.getName());
}
if (stripAllServerRelatedMetadata) {
docMd.getColList().clear();
docMd.setFimgStoreColl(null);
docMd.setOrigDocId(null);
docMd.setPageId(null);
docMd.setThumbUrl(null);
docMd.setUrl(null);
docMd.setUploaderId(-1);
docMd.setUploader(null);
docMd.setUploadTimestamp(0);
}
}
use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.
the class TrpPdfDocument method addTitlePage.
public void addTitlePage(TrpDoc doc) {
document.newPage();
PdfContentByte cb = writer.getDirectContentUnder();
float lineHeight = twelfthPoints[1][0] / 3;
float posY = twelfthPoints[1][1];
addTitleString("Title Page", posY, 0, (float) (lineHeight * 1.5), cb, bfArialBoldItalic);
posY += lineHeight * 2;
TrpDocMetadata docMd = doc.getMd();
if (writeDocMd("Title: ", docMd.getTitle(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.5;
}
if (writeDocMd("Author: ", docMd.getAuthor(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.5;
}
lineHeight = twelfthPoints[1][0] / 6;
if (writeDocMd("Description: ", docMd.getDesc(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
if (writeDocMd("Genre: ", docMd.getGenre(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
if (writeDocMd("Writer: ", docMd.getWriter(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
if (docMd.getScriptType() != null) {
if (writeDocMd("Scripttype: ", docMd.getScriptType().toString(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
}
if (writeDocMd("Language: ", docMd.getLanguage(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
if (writeDocMd("Number of Pages in whole Document: ", String.valueOf(docMd.getNrOfPages()), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
if (docMd.getCreatedFromDate() != null) {
if (writeDocMd("Created From: ", docMd.getCreatedFromDate().toString(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.2;
}
}
if (docMd.getCreatedToDate() != null) {
if (writeDocMd("Created To: ", docMd.getCreatedToDate().toString(), posY, 0, lineHeight, cb, bfArialItalic)) {
posY += lineHeight * 1.5;
}
}
// --- Export settings section
lineHeight = twelfthPoints[1][0] / 3;
addTitleString("Export Settings: ", posY, twelfthPoints[1][0], lineHeight, cb, bfArialBoldItalic);
String imageSetting = (imgOnly ? "Images without text layer" : "Images with text layer");
String extraTextSetting = (extraTextPage ? "Extra pages for transcribed text are added" : "");
String blackeningSetting = (doBlackening ? "Sensible data is invisible" : "Sensible data is shown if existent");
String tagSetting = (highlightTags ? "Tags are highlighted (colored lines) and added at the end" : "No tags shown in export");
lineHeight = twelfthPoints[1][0] / 6;
posY += lineHeight * 1.5;
addTitleString(imageSetting + " / " + extraTextSetting + " / " + blackeningSetting + " / " + tagSetting, posY, twelfthPoints[1][0], lineHeight, cb, bfArialBoldItalic);
// --- Export settings section end
// --- Editorial declaration section
lineHeight = twelfthPoints[1][0] / 3;
posY += lineHeight * 1.5;
List<EdFeature> efl = doc.getEdDeclList();
if (efl.size() >= 0) {
addTitleString("Editorial Declaration: ", posY, twelfthPoints[1][0], lineHeight, cb, bfArialBoldItalic);
posY += lineHeight * 1.5;
lineHeight = twelfthPoints[1][0] / 6;
}
for (EdFeature edfeat : efl) {
addTitleString(edfeat.getTitle() + ": " + edfeat.getDescription() + "\n" + edfeat.getSelectedOption().toString(), posY, twelfthPoints[1][0], lineHeight, cb, bfArial);
// posY += lineHeight;
// addTitleString(edfeat.getSelectedOption().toString(), posY, twelfthPoints[1][0], lineHeight, cb, bfArial);
posY += lineHeight * 1.5;
}
// --- Editorial declaration section end
}
use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.
the class MetsUtil method getTrpDocMd.
public static TrpDocMetadata getTrpDocMd(Mets mets) {
TrpDocMetadata md = null;
List<AmdSecType> secList = mets.getAmdSec();
List<MdSecType> mdSecList = null;
for (AmdSecType sec : secList) {
if (sec.getID().equals(TrpMetsBuilder.SOURCE_MD_ID_CONST)) {
mdSecList = sec.getSourceMD();
break;
}
}
if (mdSecList == null)
logger.error("No SourceMd Section found!");
else {
XmlData xmlData = null;
for (MdSecType mdSec : mdSecList) {
if (mdSec.getID().equals(TrpMetsBuilder.SOURCE_DOC_MD_ID_CONST) && mdSec.getMdWrap().getID().equals(TrpMetsBuilder.TRP_DOC_MD_TYPE_CONST)) {
xmlData = mdSec.getMdWrap().getXmlData();
break;
}
}
if (xmlData != null && xmlData.getAny().size() > 0) {
Object o = xmlData.getAny().get(0);
if (o instanceof TrpDocMetadata) {
md = (TrpDocMetadata) o;
logger.info("Found metadata: " + md.toString());
} else {
logger.error("No doc MD found! ");
}
}
}
return md;
}
use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.
the class FakeDocProvider method createDocMd.
public static TrpDocMetadata createDocMd(int docid) {
TrpDocMetadata md = new TrpDocMetadata();
md.setAuthor("The guy who made up that text");
md.setGenre("Some genre");
md.setDocId(docid);
md.setTitle("Some Handwritten Text");
md.setWriter("The guy who wrote this");
md.setScriptType(ScriptType.NORMAL);
Calendar cal = Calendar.getInstance();
cal.set(1543, 1, 1, 16, 43, 0);
md.setUploadTimestamp(cal.getTime().getTime());
md.setNrOfPages(nrOfPages);
return md;
}
use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.
the class GoobiMetsImporter method readModsMetadata.
/**
* Read the metadata from the Mods section into the corresponding TrpDocMetadata attributes
* TODO: not all attributes are filled till now#
* mising are: genre, writer, language, script type, description
*
* @param mets
* @return
*/
public TrpDocMetadata readModsMetadata(Document mets) {
TrpDocMetadata result = new TrpDocMetadata();
Element modsSection = (Element) XmlUtils.selectNode(mets.getDocumentElement(), "(*[contains(@ID,'DMDLOG_0000')])[1]");
if (modsSection != null) {
NodeList actFields = modsSection.getElementsByTagName("mods:title");
for (int i = 0; i < actFields.getLength(); i++) {
logger.debug("title element found: " + actFields.getLength());
Element act = (Element) actFields.item(i);
String typeAttribute = ((Element) act.getParentNode()).getAttribute("type");
String actValue = (actFields.item(i)).getTextContent();
if (typeAttribute == null || typeAttribute.equals("")) {
// logger.debug("set actValue: " + actValue);
result.setTitle(actValue);
}
// would be used to add title with special type, e.g. an uniform title
// if (typeAttribute!=null && typeAttribute.equals("uniform"))
// result.setTitle(actValue);
}
// actFields = modsSection.getElementsByTagName("mods:genre");
// for (int i = 0; i < actFields.getLength(); i++) {
// }
actFields = modsSection.getElementsByTagName("mods:dateIssued");
for (int i = 0; i < actFields.getLength(); i++) {
String dateString = actFields.item(i).getTextContent();
DateFormat format = new SimpleDateFormat("yyyy", Locale.GERMAN);
Date date;
try {
date = format.parse(dateString);
// System.out.println(date); // 2010-01-02
result.setCreatedFromDate(date);
result.setCreatedToDate(date);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// actFields = modsSection.getElementsByTagName("mods:languageTerm");
// for (int i = 0; i < actFields.getLength(); i++) {
// result.addLanguage(actFields.item(i).getTextContent());
// }
//
actFields = modsSection.getElementsByTagName("mods:name");
for (int i = 0; i < actFields.getLength(); i++) {
Element act = (Element) actFields.item(i);
String typeAttribute = act.getAttribute("type");
if (typeAttribute != null && typeAttribute.equals("personal")) {
String role = XmlUtils.getFirstSubElementFromElement(act, "mods:roleTerm");
// author
if (role != null && role.equals("aut")) {
String author = XmlUtils.getFirstSubElementFromElement(act, "mods:displayForm");
logger.debug("Author found is " + author);
if (author == null || author.equals("")) {
NodeList nl = act.getElementsByTagName("mods:namePart");
for (int j = 0; j < nl.getLength(); j++) {
logger.debug("NodeList length " + nl.getLength());
Element value = (Element) nl.item(j);
String valueType = value.getAttribute("type");
logger.debug("valueType " + valueType);
if (valueType.equals("family")) {
if (author == null)
author = value.getTextContent();
else
author = value.getTextContent() + " " + author;
} else if (valueType.equals("given")) {
if (author != null)
author = author.concat(" " + value.getTextContent());
else
author = value.getTextContent();
}
logger.debug("Author found is " + author);
}
}
result.setAuthor(author);
}
}
}
/*
* extract external ID
*
* https://github.com/Transkribus/TranskribusCore/issues/16
*
* TODO add possible type attribute values here
*/
actFields = modsSection.getElementsByTagName("mods:identifier");
for (int i = 0; i < actFields.getLength(); i++) {
Element act = (Element) actFields.item(i);
String typeAttribute = act.getAttribute("type");
// NAF uses type="CatalogueIdentifier"
if (typeAttribute != null && typeAttribute.equals("CatalogueIdentifier")) {
final String extId = act.getNodeValue();
result.setExternalId(extId);
}
}
} else {
logger.debug("mods section is null");
result.setTitle("unknownTitle");
result.setAuthor("unknownAuthor");
}
return result;
}
Aggregations