Search in sources :

Example 6 with TrpDocMetadata

use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.

the class LocalDocReader method initDocMd.

/**
 * Initiates the metadata object. E.g. Doc ID is set to -1 and the local folder is set to input dir.
 * If no title is included, then the input dir name is used.
 * @param docMd
 * @param inputDir
 * @param stripAllServerRelatedMetadata if true, then all server related data is removed from the metadata object: collections, symbolic image links, etc.
 */
private static void initDocMd(TrpDocMetadata docMd, File inputDir, boolean stripAllServerRelatedMetadata) {
    if (inputDir == null) {
        throw new IllegalArgumentException("Input dir must not be null.");
    }
    if (docMd == null) {
        docMd = new TrpDocMetadata();
    }
    docMd.setLocalFolder(inputDir);
    docMd.setDocId(-1);
    if (StringUtils.isEmpty(docMd.getTitle())) {
        docMd.setTitle(inputDir.getName());
    }
    if (stripAllServerRelatedMetadata) {
        docMd.getColList().clear();
        docMd.setFimgStoreColl(null);
        docMd.setOrigDocId(null);
        docMd.setPageId(null);
        docMd.setThumbUrl(null);
        docMd.setUrl(null);
        docMd.setUploaderId(-1);
        docMd.setUploader(null);
        docMd.setUploadTimestamp(0);
    }
}
Also used : TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata)

Example 7 with TrpDocMetadata

use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.

the class TrpPdfDocument method addTitlePage.

public void addTitlePage(TrpDoc doc) {
    document.newPage();
    PdfContentByte cb = writer.getDirectContentUnder();
    float lineHeight = twelfthPoints[1][0] / 3;
    float posY = twelfthPoints[1][1];
    addTitleString("Title Page", posY, 0, (float) (lineHeight * 1.5), cb, bfArialBoldItalic);
    posY += lineHeight * 2;
    TrpDocMetadata docMd = doc.getMd();
    if (writeDocMd("Title: ", docMd.getTitle(), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.5;
    }
    if (writeDocMd("Author: ", docMd.getAuthor(), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.5;
    }
    lineHeight = twelfthPoints[1][0] / 6;
    if (writeDocMd("Description: ", docMd.getDesc(), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.2;
    }
    if (writeDocMd("Genre: ", docMd.getGenre(), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.2;
    }
    if (writeDocMd("Writer: ", docMd.getWriter(), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.2;
    }
    if (docMd.getScriptType() != null) {
        if (writeDocMd("Scripttype: ", docMd.getScriptType().toString(), posY, 0, lineHeight, cb, bfArialItalic)) {
            posY += lineHeight * 1.2;
        }
    }
    if (writeDocMd("Language: ", docMd.getLanguage(), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.2;
    }
    if (writeDocMd("Number of Pages in whole Document: ", String.valueOf(docMd.getNrOfPages()), posY, 0, lineHeight, cb, bfArialItalic)) {
        posY += lineHeight * 1.2;
    }
    if (docMd.getCreatedFromDate() != null) {
        if (writeDocMd("Created From: ", docMd.getCreatedFromDate().toString(), posY, 0, lineHeight, cb, bfArialItalic)) {
            posY += lineHeight * 1.2;
        }
    }
    if (docMd.getCreatedToDate() != null) {
        if (writeDocMd("Created To: ", docMd.getCreatedToDate().toString(), posY, 0, lineHeight, cb, bfArialItalic)) {
            posY += lineHeight * 1.5;
        }
    }
    // --- Export settings section
    lineHeight = twelfthPoints[1][0] / 3;
    addTitleString("Export Settings: ", posY, twelfthPoints[1][0], lineHeight, cb, bfArialBoldItalic);
    String imageSetting = (imgOnly ? "Images without text layer" : "Images with text layer");
    String extraTextSetting = (extraTextPage ? "Extra pages for transcribed text are added" : "");
    String blackeningSetting = (doBlackening ? "Sensible data is invisible" : "Sensible data is shown if existent");
    String tagSetting = (highlightTags ? "Tags are highlighted (colored lines) and added at the end" : "No tags shown in export");
    lineHeight = twelfthPoints[1][0] / 6;
    posY += lineHeight * 1.5;
    addTitleString(imageSetting + " / " + extraTextSetting + " / " + blackeningSetting + " / " + tagSetting, posY, twelfthPoints[1][0], lineHeight, cb, bfArialBoldItalic);
    // --- Export settings section end
    // --- Editorial declaration section
    lineHeight = twelfthPoints[1][0] / 3;
    posY += lineHeight * 1.5;
    List<EdFeature> efl = doc.getEdDeclList();
    if (efl.size() >= 0) {
        addTitleString("Editorial Declaration: ", posY, twelfthPoints[1][0], lineHeight, cb, bfArialBoldItalic);
        posY += lineHeight * 1.5;
        lineHeight = twelfthPoints[1][0] / 6;
    }
    for (EdFeature edfeat : efl) {
        addTitleString(edfeat.getTitle() + ": " + edfeat.getDescription() + "\n" + edfeat.getSelectedOption().toString(), posY, twelfthPoints[1][0], lineHeight, cb, bfArial);
        // posY += lineHeight;
        // addTitleString(edfeat.getSelectedOption().toString(), posY, twelfthPoints[1][0], lineHeight, cb, bfArial);
        posY += lineHeight * 1.5;
    }
// --- Editorial declaration section	end
}
Also used : EdFeature(eu.transkribus.core.model.beans.EdFeature) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) PdfContentByte(com.itextpdf.text.pdf.PdfContentByte)

Example 8 with TrpDocMetadata

use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.

the class MetsUtil method getTrpDocMd.

public static TrpDocMetadata getTrpDocMd(Mets mets) {
    TrpDocMetadata md = null;
    List<AmdSecType> secList = mets.getAmdSec();
    List<MdSecType> mdSecList = null;
    for (AmdSecType sec : secList) {
        if (sec.getID().equals(TrpMetsBuilder.SOURCE_MD_ID_CONST)) {
            mdSecList = sec.getSourceMD();
            break;
        }
    }
    if (mdSecList == null)
        logger.error("No SourceMd Section found!");
    else {
        XmlData xmlData = null;
        for (MdSecType mdSec : mdSecList) {
            if (mdSec.getID().equals(TrpMetsBuilder.SOURCE_DOC_MD_ID_CONST) && mdSec.getMdWrap().getID().equals(TrpMetsBuilder.TRP_DOC_MD_TYPE_CONST)) {
                xmlData = mdSec.getMdWrap().getXmlData();
                break;
            }
        }
        if (xmlData != null && xmlData.getAny().size() > 0) {
            Object o = xmlData.getAny().get(0);
            if (o instanceof TrpDocMetadata) {
                md = (TrpDocMetadata) o;
                logger.info("Found metadata: " + md.toString());
            } else {
                logger.error("No doc MD found! ");
            }
        }
    }
    return md;
}
Also used : MdSecType(eu.transkribus.core.model.beans.mets.MdSecType) XmlData(eu.transkribus.core.model.beans.mets.MdSecType.MdWrap.XmlData) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) AmdSecType(eu.transkribus.core.model.beans.mets.AmdSecType)

Example 9 with TrpDocMetadata

use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.

the class FakeDocProvider method createDocMd.

public static TrpDocMetadata createDocMd(int docid) {
    TrpDocMetadata md = new TrpDocMetadata();
    md.setAuthor("The guy who made up that text");
    md.setGenre("Some genre");
    md.setDocId(docid);
    md.setTitle("Some Handwritten Text");
    md.setWriter("The guy who wrote this");
    md.setScriptType(ScriptType.NORMAL);
    Calendar cal = Calendar.getInstance();
    cal.set(1543, 1, 1, 16, 43, 0);
    md.setUploadTimestamp(cal.getTime().getTime());
    md.setNrOfPages(nrOfPages);
    return md;
}
Also used : Calendar(java.util.Calendar) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata)

Example 10 with TrpDocMetadata

use of eu.transkribus.core.model.beans.TrpDocMetadata in project TranskribusCore by Transkribus.

the class GoobiMetsImporter method readModsMetadata.

/**
 * Read the metadata from the Mods section into the corresponding TrpDocMetadata attributes
 * TODO: not all attributes are filled till now#
 * mising are: genre, writer, language, script type, description
 *
 * @param mets
 * @return
 */
public TrpDocMetadata readModsMetadata(Document mets) {
    TrpDocMetadata result = new TrpDocMetadata();
    Element modsSection = (Element) XmlUtils.selectNode(mets.getDocumentElement(), "(*[contains(@ID,'DMDLOG_0000')])[1]");
    if (modsSection != null) {
        NodeList actFields = modsSection.getElementsByTagName("mods:title");
        for (int i = 0; i < actFields.getLength(); i++) {
            logger.debug("title element found: " + actFields.getLength());
            Element act = (Element) actFields.item(i);
            String typeAttribute = ((Element) act.getParentNode()).getAttribute("type");
            String actValue = (actFields.item(i)).getTextContent();
            if (typeAttribute == null || typeAttribute.equals("")) {
                // logger.debug("set actValue: " + actValue);
                result.setTitle(actValue);
            }
        // would be used to add title with special type, e.g. an uniform title
        // if (typeAttribute!=null && typeAttribute.equals("uniform"))
        // result.setTitle(actValue);
        }
        // actFields = modsSection.getElementsByTagName("mods:genre");
        // for (int i = 0; i < actFields.getLength(); i++) {
        // }
        actFields = modsSection.getElementsByTagName("mods:dateIssued");
        for (int i = 0; i < actFields.getLength(); i++) {
            String dateString = actFields.item(i).getTextContent();
            DateFormat format = new SimpleDateFormat("yyyy", Locale.GERMAN);
            Date date;
            try {
                date = format.parse(dateString);
                // System.out.println(date); // 2010-01-02
                result.setCreatedFromDate(date);
                result.setCreatedToDate(date);
            } catch (ParseException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        // actFields = modsSection.getElementsByTagName("mods:languageTerm");
        // for (int i = 0; i < actFields.getLength(); i++) {
        // result.addLanguage(actFields.item(i).getTextContent());
        // }
        // 
        actFields = modsSection.getElementsByTagName("mods:name");
        for (int i = 0; i < actFields.getLength(); i++) {
            Element act = (Element) actFields.item(i);
            String typeAttribute = act.getAttribute("type");
            if (typeAttribute != null && typeAttribute.equals("personal")) {
                String role = XmlUtils.getFirstSubElementFromElement(act, "mods:roleTerm");
                // author
                if (role != null && role.equals("aut")) {
                    String author = XmlUtils.getFirstSubElementFromElement(act, "mods:displayForm");
                    logger.debug("Author found is " + author);
                    if (author == null || author.equals("")) {
                        NodeList nl = act.getElementsByTagName("mods:namePart");
                        for (int j = 0; j < nl.getLength(); j++) {
                            logger.debug("NodeList length " + nl.getLength());
                            Element value = (Element) nl.item(j);
                            String valueType = value.getAttribute("type");
                            logger.debug("valueType " + valueType);
                            if (valueType.equals("family")) {
                                if (author == null)
                                    author = value.getTextContent();
                                else
                                    author = value.getTextContent() + " " + author;
                            } else if (valueType.equals("given")) {
                                if (author != null)
                                    author = author.concat(" " + value.getTextContent());
                                else
                                    author = value.getTextContent();
                            }
                            logger.debug("Author found is " + author);
                        }
                    }
                    result.setAuthor(author);
                }
            }
        }
        /*
			 * extract external ID
			 * 
			 * https://github.com/Transkribus/TranskribusCore/issues/16
			 * 
			 * TODO add possible type attribute values here
			 */
        actFields = modsSection.getElementsByTagName("mods:identifier");
        for (int i = 0; i < actFields.getLength(); i++) {
            Element act = (Element) actFields.item(i);
            String typeAttribute = act.getAttribute("type");
            // NAF uses type="CatalogueIdentifier"
            if (typeAttribute != null && typeAttribute.equals("CatalogueIdentifier")) {
                final String extId = act.getNodeValue();
                result.setExternalId(extId);
            }
        }
    } else {
        logger.debug("mods section is null");
        result.setTitle("unknownTitle");
        result.setAuthor("unknownAuthor");
    }
    return result;
}
Also used : Element(org.w3c.dom.Element) NodeList(org.w3c.dom.NodeList) DateFormat(java.text.DateFormat) SimpleDateFormat(java.text.SimpleDateFormat) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata) ParseException(java.text.ParseException) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date)

Aggregations

TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)16 File (java.io.File)7 IOException (java.io.IOException)7 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)6 TrpPage (eu.transkribus.core.model.beans.TrpPage)5 EdFeature (eu.transkribus.core.model.beans.EdFeature)4 FileNotFoundException (java.io.FileNotFoundException)3 Date (java.util.Date)3 JAXBException (javax.xml.bind.JAXBException)3 CorruptImageException (eu.transkribus.core.exceptions.CorruptImageException)2 AmdSecType (eu.transkribus.core.model.beans.mets.AmdSecType)2 MdSecType (eu.transkribus.core.model.beans.mets.MdSecType)2 Mets (eu.transkribus.core.model.beans.mets.Mets)2 Dimension (java.awt.Dimension)2 SimpleDateFormat (java.text.SimpleDateFormat)2 ArrayList (java.util.ArrayList)2 PdfContentByte (com.itextpdf.text.pdf.PdfContentByte)1 XmlFormat (eu.transkribus.core.io.formats.XmlFormat)1 MdFileFilter (eu.transkribus.core.io.util.MdFileFilter)1 PageUploadDescriptor (eu.transkribus.core.model.beans.DocumentUploadDescriptor.PageUploadDescriptor)1