Search in sources :

Example 6 with XMPSchemaDublinCore

use of org.apache.jempbox.xmp.XMPSchemaDublinCore in project jabref by JabRef.

the class XMPUtilTest method testSimpleUpdate.

/**
     * Tests whether writing BibTex.xmp will preserve existing XMP-descriptions.
     *
     * @throws Exception (indicating an failure)
     */
@Test
public void testSimpleUpdate() throws Exception {
    String s = " <rdf:Description rdf:about=''" + "  xmlns:xmp='http://ns.adobe.com/xap/1.0/'>" + "  <xmp:CreatorTool>Acrobat PDFMaker 7.0.7</xmp:CreatorTool>" + "  <xmp:ModifyDate>2006-08-07T18:50:24+02:00</xmp:ModifyDate>" + "  <xmp:CreateDate>2006-08-07T14:44:24+02:00</xmp:CreateDate>" + "  <xmp:MetadataDate>2006-08-07T18:50:24+02:00</xmp:MetadataDate>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/'>" + "  <xapMM:DocumentID>uuid:843cd67d-495e-4c1e-a4cd-64178f6b3299</xapMM:DocumentID>" + "  <xapMM:InstanceID>uuid:1e56b4c0-6782-440d-ba76-d2b3d87547d1</xapMM:InstanceID>" + "  <xapMM:VersionID>" + "   <rdf:Seq>" + "    <rdf:li>17</rdf:li>" + "   </rdf:Seq>" + "  </xapMM:VersionID>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:dc='http://purl.org/dc/elements/1.1/'>" + "  <dc:format>application/pdf</dc:format>" + "</rdf:Description>";
    writeManually(pdfFile, XMPUtilTest.bibtexXPacket(s));
    // Nothing there yet, but should not crash
    Assert.assertEquals(Collections.emptyList(), XMPUtil.readXMP(pdfFile, xmpPreferences));
    s = " <rdf:Description rdf:about=''" + "  xmlns:xmp='http://ns.adobe.com/xap/1.0/'>" + "  <xmp:CreatorTool>Acrobat PDFMaker 7.0.7</xmp:CreatorTool>" + "  <xmp:ModifyDate>2006-08-07T18:50:24+02:00</xmp:ModifyDate>" + "  <xmp:CreateDate>2006-08-07T14:44:24+02:00</xmp:CreateDate>" + "  <xmp:MetadataDate>2006-08-07T18:50:24+02:00</xmp:MetadataDate>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/'>" + "  <xapMM:DocumentID>uuid:843cd67d-495e-4c1e-a4cd-64178f6b3299</xapMM:DocumentID>" + "  <xapMM:InstanceID>uuid:1e56b4c0-6782-440d-ba76-d2b3d87547d1</xapMM:InstanceID>" + "  <xapMM:VersionID>" + "   <rdf:Seq>" + "    <rdf:li>17</rdf:li>" + "   </rdf:Seq>" + "  </xapMM:VersionID>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:dc='http://purl.org/dc/elements/1.1/'>" + "  <dc:format>application/pdf</dc:format>" + "  <dc:title>" + "   <rdf:Alt>" + "    <rdf:li xml:lang='x-default'>Questionnaire.pdf</rdf:li>" + "   </rdf:Alt>" + "  </dc:title>" + "</rdf:Description>";
    writeManually(pdfFile, XMPUtilTest.bibtexXPacket(s));
    // Title is Questionnaire.pdf so the DublinCore fallback should hit
    // in...
    Assert.assertEquals(1, XMPUtil.readXMP(pdfFile, xmpPreferences).size());
    {
        // Now write new packet and check if it was correctly written
        XMPUtil.writeXMP(pdfFile, t1BibtexEntry(), null, xmpPreferences);
        List<BibEntry> l = XMPUtil.readXMP(pdfFile.getAbsoluteFile(), xmpPreferences);
        Assert.assertEquals(1, l.size());
        BibEntry e = l.get(0);
        assertEqualsBibtexEntry(t1BibtexEntry(), e);
        try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
            if (document.isEncrypted()) {
                throw new IOException("Error: Cannot read metadata from encrypted document.");
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDMetadata metaRaw = catalog.getMetadata();
            XMPMetadata meta;
            if (metaRaw == null) {
                meta = new XMPMetadata();
            } else {
                meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
            }
            meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
            List<XMPSchema> schemas = meta.getSchemas();
            Assert.assertEquals(4, schemas.size());
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
            Assert.assertEquals("application/pdf", dc.getFormat());
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
            Assert.assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());
            Calendar c = Calendar.getInstance();
            c.clear();
            c.set(Calendar.YEAR, 2006);
            c.set(Calendar.MONTH, Calendar.AUGUST);
            c.set(Calendar.DATE, 7);
            c.set(Calendar.HOUR, 14);
            c.set(Calendar.MINUTE, 44);
            c.set(Calendar.SECOND, 24);
            c.setTimeZone(TimeZone.getTimeZone("GMT+2"));
            Calendar other = bs.getCreateDate();
            Assert.assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
            Assert.assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
            Assert.assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
            Assert.assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
            Assert.assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
            Assert.assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
            Assert.assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas.get(0);
            Assert.assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));
        }
    }
    // Now alter the Bibtex entry, write it and do all the checks again
    BibEntry toSet = t1BibtexEntry();
    toSet.setField("author", "Pokemon!");
    XMPUtil.writeXMP(pdfFile, toSet, null, xmpPreferences);
    List<BibEntry> l = XMPUtil.readXMP(pdfFile.getAbsoluteFile(), xmpPreferences);
    Assert.assertEquals(1, l.size());
    BibEntry e = l.get(0);
    assertEqualsBibtexEntry(toSet, e);
    try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
        if (document.isEncrypted()) {
            throw new IOException("Error: Cannot read metadata from encrypted document.");
        }
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();
        XMPMetadata meta;
        if (metaRaw == null) {
            meta = new XMPMetadata();
        } else {
            meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
        List<XMPSchema> schemas = meta.getSchemas();
        Assert.assertEquals(4, schemas.size());
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
        Assert.assertEquals("application/pdf", dc.getFormat());
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
        Assert.assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());
        Calendar c = Calendar.getInstance();
        c.clear();
        c.set(Calendar.YEAR, 2006);
        c.set(Calendar.MONTH, 7);
        c.set(Calendar.DATE, 7);
        c.set(Calendar.HOUR, 14);
        c.set(Calendar.MINUTE, 44);
        c.set(Calendar.SECOND, 24);
        c.setTimeZone(TimeZone.getTimeZone("GMT+2"));
        Calendar other = bs.getCreateDate();
        Assert.assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
        Assert.assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
        Assert.assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
        Assert.assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
        Assert.assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
        Assert.assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
        Assert.assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas.get(0);
        Assert.assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));
    }
}
Also used : XMPSchemaDublinCore(org.apache.jempbox.xmp.XMPSchemaDublinCore) BibEntry(org.jabref.model.entry.BibEntry) XMPSchemaBasic(org.apache.jempbox.xmp.XMPSchemaBasic) XMPSchema(org.apache.jempbox.xmp.XMPSchema) Calendar(java.util.Calendar) IOException(java.io.IOException) PDMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog) XMPSchemaMediaManagement(org.apache.jempbox.xmp.XMPSchemaMediaManagement) XMPMetadata(org.apache.jempbox.xmp.XMPMetadata) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) AuthorList(org.jabref.model.entry.AuthorList) List(java.util.List) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 7 with XMPSchemaDublinCore

use of org.apache.jempbox.xmp.XMPSchemaDublinCore in project jabref by JabRef.

the class XMPUtilTest method testWriteSingleUpdatesDCAndInfo.

@Test
public void testWriteSingleUpdatesDCAndInfo() throws IOException, TransformerException {
    List<BibEntry> l = new LinkedList<>();
    l.add(t3BibtexEntry());
    XMPUtil.writeXMP(pdfFile, l, null, true, xmpPreferences);
    try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
        if (document.isEncrypted()) {
            Assert.fail("Cannot add metadata to encrypted document.");
        }
        Assert.assertEquals("Kelly Clarkson and Ozzy Osbourne", document.getDocumentInformation().getAuthor());
        Assert.assertEquals("Hypersonic ultra-sound", document.getDocumentInformation().getTitle());
        Assert.assertEquals("Huey Duck and Dewey Duck and Louie Duck", document.getDocumentInformation().getCustomMetadataValue("bibtex/editor"));
        Assert.assertEquals("Clarkson06", document.getDocumentInformation().getCustomMetadataValue("bibtex/bibtexkey"));
        Assert.assertEquals("peanut, butter, jelly", document.getDocumentInformation().getKeywords());
        assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDocumentInformation(document.getDocumentInformation()).get());
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();
        if (metaRaw == null) {
            Assert.fail();
            return;
        }
        XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
        // Check Dublin Core
        List<XMPSchema> schemas = meta.getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
        Assert.assertEquals(1, schemas.size());
        XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas.iterator().next();
        Assert.assertNotNull(dcSchema);
        Assert.assertEquals("Hypersonic ultra-sound", dcSchema.getTitle());
        Assert.assertEquals("1982-07", dcSchema.getSequenceList("dc:date").get(0));
        Assert.assertEquals("Kelly Clarkson", dcSchema.getCreators().get(0));
        Assert.assertEquals("Ozzy Osbourne", dcSchema.getCreators().get(1));
        Assert.assertEquals("Huey Duck", dcSchema.getContributors().get(0));
        Assert.assertEquals("Dewey Duck", dcSchema.getContributors().get(1));
        Assert.assertEquals("Louie Duck", dcSchema.getContributors().get(2));
        Assert.assertEquals("InProceedings".toLowerCase(), dcSchema.getTypes().get(0).toLowerCase());
        Assert.assertTrue(dcSchema.getRelationships().contains("bibtex/bibtexkey/Clarkson06"));
        Assert.assertEquals("peanut", dcSchema.getSubjects().get(0));
        Assert.assertEquals("butter", dcSchema.getSubjects().get(1));
        Assert.assertEquals("jelly", dcSchema.getSubjects().get(2));
        /**
             * Bibtexkey, Journal, pdf, booktitle
             */
        Assert.assertEquals(4, dcSchema.getRelationships().size());
        assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDublinCore(dcSchema, xmpPreferences).get());
    }
}
Also used : XMPSchemaDublinCore(org.apache.jempbox.xmp.XMPSchemaDublinCore) BibEntry(org.jabref.model.entry.BibEntry) XMPMetadata(org.apache.jempbox.xmp.XMPMetadata) XMPSchema(org.apache.jempbox.xmp.XMPSchema) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) PDMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) LinkedList(java.util.LinkedList) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog) Test(org.junit.Test)

Example 8 with XMPSchemaDublinCore

use of org.apache.jempbox.xmp.XMPSchemaDublinCore in project jabref by JabRef.

the class XMPUtil method writeDublinCore.

/**
     * Try to write the given BibTexEntries as DublinCore XMP Schemas
     *
     * Existing DublinCore schemas in the document are removed
     *
     * @param document The pdf document to write to.
     * @param entries  The BibTeX entries that are written as schemas
     * @param database maybenull An optional database which the given BibTeX entries belong to, which will be used to
     *                 resolve strings. If the database is null the strings will not be resolved.
     */
private static void writeDublinCore(PDDocument document, Collection<BibEntry> entries, BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
    Collection<BibEntry> resolvedEntries;
    if (database == null) {
        resolvedEntries = entries;
    } else {
        resolvedEntries = database.resolveForStrings(entries, false);
    }
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();
    XMPMetadata meta;
    if (metaRaw == null) {
        meta = new XMPMetadata();
    } else {
        meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
    }
    // Remove all current Dublin-Core schemas
    List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
    for (XMPSchema schema : schemas) {
        schema.getElement().getParentNode().removeChild(schema.getElement());
    }
    for (BibEntry entry : resolvedEntries) {
        XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta);
        XMPUtil.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
        meta.addSchema(dcSchema);
    }
    // Save to stream and then input that stream to the PDF
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    meta.save(os);
    ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
    PDMetadata metadataStream = new PDMetadata(document, is, false);
    catalog.setMetadata(metadataStream);
}
Also used : XMPSchemaDublinCore(org.apache.jempbox.xmp.XMPSchemaDublinCore) BibEntry(org.jabref.model.entry.BibEntry) TypedBibEntry(org.jabref.logic.TypedBibEntry) XMPMetadata(org.apache.jempbox.xmp.XMPMetadata) XMPSchema(org.apache.jempbox.xmp.XMPSchema) ByteArrayInputStream(java.io.ByteArrayInputStream) PDMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog)

Example 9 with XMPSchemaDublinCore

use of org.apache.jempbox.xmp.XMPSchemaDublinCore in project tika by apache.

the class PDFParser method extractMetadata.

private void extractMetadata(PDDocument document, Metadata metadata, ParseContext context) throws TikaException {
    //first extract AccessPermissions
    AccessPermission ap = document.getCurrentAccessPermission();
    metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, Boolean.toString(ap.canExtractForAccessibility()));
    metadata.set(AccessPermissions.EXTRACT_CONTENT, Boolean.toString(ap.canExtractContent()));
    metadata.set(AccessPermissions.ASSEMBLE_DOCUMENT, Boolean.toString(ap.canAssembleDocument()));
    metadata.set(AccessPermissions.FILL_IN_FORM, Boolean.toString(ap.canFillInForm()));
    metadata.set(AccessPermissions.CAN_MODIFY, Boolean.toString(ap.canModify()));
    metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS, Boolean.toString(ap.canModifyAnnotations()));
    metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
    metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));
    //now go for the XMP
    Document dom = loadDOM(document.getDocumentCatalog().getMetadata(), metadata, context);
    XMPMetadata xmp = null;
    if (dom != null) {
        xmp = new XMPMetadata(dom);
    }
    XMPSchemaDublinCore dcSchema = null;
    if (xmp != null) {
        try {
            dcSchema = xmp.getDublinCoreSchema();
        } catch (IOException e) {
        }
        JempboxExtractor.extractXMPMM(xmp, metadata);
    }
    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema);
    addMetadata(metadata, PDF.DOC_INFO_TITLE, info.getTitle());
    extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema);
    addMetadata(metadata, PDF.DOC_INFO_CREATOR, info.getAuthor());
    extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
    addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
    addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords());
    addMetadata(metadata, "producer", info.getProducer());
    addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer());
    extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);
    addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject());
    // TODO: Move to description in Tika 2.0
    addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped());
    // TODO Remove these in Tika 2.0
    addMetadata(metadata, "created", info.getCreationDate());
    addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate());
    addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
    Calendar modified = info.getModificationDate();
    addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
    addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
    addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate());
    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", "Keywords", "Producer", "Subject", "Title", "Trapped");
    for (COSName key : info.getCOSObject().keySet()) {
        String name = key.getName();
        if (!handledMetadata.contains(name)) {
            addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key));
            addMetadata(metadata, PDF.PDF_DOC_INFO_CUSTOM_PREFIX + name, info.getCOSObject().getDictionaryObject(key));
        }
    }
    //try to get the various versions
    //Caveats:
    //    there is currently a fair amount of redundancy
    //    TikaCoreProperties.FORMAT can be multivalued
    //    There are also three potential pdf specific version keys: pdf:PDFVersion, pdfa:PDFVersion, pdf:PDFExtensionVersion
    metadata.set(PDF.PDF_VERSION, Float.toString(document.getDocument().getVersion()));
    metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=" + Float.toString(document.getDocument().getVersion()));
    try {
        if (xmp != null) {
            xmp.addXMLNSMapping(XMPSchemaPDFAId.NAMESPACE, XMPSchemaPDFAId.class);
            XMPSchemaPDFAId pdfaxmp = (XMPSchemaPDFAId) xmp.getSchemaByClass(XMPSchemaPDFAId.class);
            if (pdfaxmp != null) {
                if (pdfaxmp.getPart() != null) {
                    metadata.set(PDF.PDFAID_PART, Integer.toString(pdfaxmp.getPart()));
                }
                if (pdfaxmp.getConformance() != null) {
                    metadata.set(PDF.PDFAID_CONFORMANCE, pdfaxmp.getConformance());
                    String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
                    metadata.set(PDF.PDFA_VERSION, version);
                    metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + version + "\"");
                }
            }
        // TODO WARN if this XMP version is inconsistent with document header version?          
        }
    } catch (IOException e) {
        metadata.set(TikaCoreProperties.TIKA_META_PREFIX + "pdf:metadata-xmp-parse-failed", "" + e);
    }
    //TODO: Let's try to move this into PDFBox.
    //Attempt to determine Adobe extension level, if present:
    COSDictionary root = document.getDocumentCatalog().getCOSObject();
    COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
    if (extensions != null) {
        for (COSName extName : extensions.keySet()) {
            // If it's an Adobe one, interpret it to determine the extension level:
            if (extName.equals(COSName.getPDFName("ADBE"))) {
                COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
                if (adobeExt != null) {
                    String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                    int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                    //-1 is sentinel value that something went wrong in getInt
                    if (el != -1) {
                        metadata.set(PDF.PDF_EXTENSION_VERSION, baseVersion + " Adobe Extension Level " + el);
                        metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + baseVersion + " Adobe Extension Level " + el + "\"");
                    }
                }
            } else {
                // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
                metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
            }
        }
    }
}
Also used : XMPSchemaDublinCore(org.apache.jempbox.xmp.XMPSchemaDublinCore) COSDictionary(org.apache.pdfbox.cos.COSDictionary) Calendar(java.util.Calendar) AccessPermission(org.apache.pdfbox.pdmodel.encryption.AccessPermission) IOException(java.io.IOException) COSString(org.apache.pdfbox.cos.COSString) Document(org.w3c.dom.Document) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) COSName(org.apache.pdfbox.cos.COSName) XMPSchemaPDFAId(org.apache.jempbox.xmp.pdfa.XMPSchemaPDFAId) XMPMetadata(org.apache.jempbox.xmp.XMPMetadata) PDDocumentInformation(org.apache.pdfbox.pdmodel.PDDocumentInformation)

Example 10 with XMPSchemaDublinCore

use of org.apache.jempbox.xmp.XMPSchemaDublinCore in project OpenOLAT by OpenOLAT.

the class PdfDocument method addMetadata.

public void addMetadata(String title, String subject, String author) throws IOException, TransformerException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentInformation info = document.getDocumentInformation();
    Calendar date = Calendar.getInstance();
    info.setAuthor(author);
    info.setCreator(author);
    info.setCreationDate(date);
    info.setModificationDate(date);
    info.setTitle(title);
    info.setSubject(subject);
    XMPMetadata metadata = new XMPMetadata();
    XMPSchemaPDF pdfSchema = metadata.addPDFSchema();
    pdfSchema.setProducer("OpenOLAT");
    XMPSchemaBasic basicSchema = metadata.addBasicSchema();
    basicSchema.setModifyDate(date);
    basicSchema.setCreateDate(date);
    basicSchema.setCreatorTool("OpenOLAT");
    basicSchema.setMetadataDate(date);
    XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema();
    dcSchema.setTitle(title);
    dcSchema.addCreator(author);
    dcSchema.setDescription(subject);
    PDMetadata metadataStream = new PDMetadata(document);
    metadataStream.importXMPMetadata(metadata);
    catalog.setMetadata(metadataStream);
}
Also used : XMPSchemaDublinCore(org.apache.jempbox.xmp.XMPSchemaDublinCore) XMPMetadata(org.apache.jempbox.xmp.XMPMetadata) XMPSchemaBasic(org.apache.jempbox.xmp.XMPSchemaBasic) Calendar(java.util.Calendar) XMPSchemaPDF(org.apache.jempbox.xmp.XMPSchemaPDF) PDMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) PDDocumentInformation(org.apache.pdfbox.pdmodel.PDDocumentInformation) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog)

Aggregations

XMPMetadata (org.apache.jempbox.xmp.XMPMetadata)10 XMPSchemaDublinCore (org.apache.jempbox.xmp.XMPSchemaDublinCore)10 PDDocumentCatalog (org.apache.pdfbox.pdmodel.PDDocumentCatalog)8 PDMetadata (org.apache.pdfbox.pdmodel.common.PDMetadata)8 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)7 XMPSchema (org.apache.jempbox.xmp.XMPSchema)6 BibEntry (org.jabref.model.entry.BibEntry)6 Calendar (java.util.Calendar)4 LinkedList (java.util.LinkedList)4 PDDocumentInformation (org.apache.pdfbox.pdmodel.PDDocumentInformation)4 Test (org.junit.Test)4 XMPSchemaBasic (org.apache.jempbox.xmp.XMPSchemaBasic)3 IOException (java.io.IOException)2 XMPSchemaPDF (org.apache.jempbox.xmp.XMPSchemaPDF)2 TypedBibEntry (org.jabref.logic.TypedBibEntry)2 AuthorList (org.jabref.model.entry.AuthorList)2 BoundingBox (com.revolsys.geometry.model.BoundingBox)1 Viewport2D (com.revolsys.swing.map.Viewport2D)1 Project (com.revolsys.swing.map.layer.Project)1 BufferedReader (java.io.BufferedReader)1