Search in sources :

Example 1 with XMPSchemaMediaManagement

use of org.apache.jempbox.xmp.XMPSchemaMediaManagement in project jabref by JabRef.

the class XMPUtilTest method testSimpleUpdate.

/**
     * Tests whether writing BibTex.xmp will preserve existing XMP-descriptions.
     *
     * @throws Exception (indicating an failure)
     */
@Test
public void testSimpleUpdate() throws Exception {
    String s = " <rdf:Description rdf:about=''" + "  xmlns:xmp='http://ns.adobe.com/xap/1.0/'>" + "  <xmp:CreatorTool>Acrobat PDFMaker 7.0.7</xmp:CreatorTool>" + "  <xmp:ModifyDate>2006-08-07T18:50:24+02:00</xmp:ModifyDate>" + "  <xmp:CreateDate>2006-08-07T14:44:24+02:00</xmp:CreateDate>" + "  <xmp:MetadataDate>2006-08-07T18:50:24+02:00</xmp:MetadataDate>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/'>" + "  <xapMM:DocumentID>uuid:843cd67d-495e-4c1e-a4cd-64178f6b3299</xapMM:DocumentID>" + "  <xapMM:InstanceID>uuid:1e56b4c0-6782-440d-ba76-d2b3d87547d1</xapMM:InstanceID>" + "  <xapMM:VersionID>" + "   <rdf:Seq>" + "    <rdf:li>17</rdf:li>" + "   </rdf:Seq>" + "  </xapMM:VersionID>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:dc='http://purl.org/dc/elements/1.1/'>" + "  <dc:format>application/pdf</dc:format>" + "</rdf:Description>";
    writeManually(pdfFile, XMPUtilTest.bibtexXPacket(s));
    // Nothing there yet, but should not crash
    Assert.assertEquals(Collections.emptyList(), XMPUtil.readXMP(pdfFile, xmpPreferences));
    s = " <rdf:Description rdf:about=''" + "  xmlns:xmp='http://ns.adobe.com/xap/1.0/'>" + "  <xmp:CreatorTool>Acrobat PDFMaker 7.0.7</xmp:CreatorTool>" + "  <xmp:ModifyDate>2006-08-07T18:50:24+02:00</xmp:ModifyDate>" + "  <xmp:CreateDate>2006-08-07T14:44:24+02:00</xmp:CreateDate>" + "  <xmp:MetadataDate>2006-08-07T18:50:24+02:00</xmp:MetadataDate>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/'>" + "  <xapMM:DocumentID>uuid:843cd67d-495e-4c1e-a4cd-64178f6b3299</xapMM:DocumentID>" + "  <xapMM:InstanceID>uuid:1e56b4c0-6782-440d-ba76-d2b3d87547d1</xapMM:InstanceID>" + "  <xapMM:VersionID>" + "   <rdf:Seq>" + "    <rdf:li>17</rdf:li>" + "   </rdf:Seq>" + "  </xapMM:VersionID>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + "  xmlns:dc='http://purl.org/dc/elements/1.1/'>" + "  <dc:format>application/pdf</dc:format>" + "  <dc:title>" + "   <rdf:Alt>" + "    <rdf:li xml:lang='x-default'>Questionnaire.pdf</rdf:li>" + "   </rdf:Alt>" + "  </dc:title>" + "</rdf:Description>";
    writeManually(pdfFile, XMPUtilTest.bibtexXPacket(s));
    // Title is Questionnaire.pdf so the DublinCore fallback should hit
    // in...
    Assert.assertEquals(1, XMPUtil.readXMP(pdfFile, xmpPreferences).size());
    {
        // Now write new packet and check if it was correctly written
        XMPUtil.writeXMP(pdfFile, t1BibtexEntry(), null, xmpPreferences);
        List<BibEntry> l = XMPUtil.readXMP(pdfFile.getAbsoluteFile(), xmpPreferences);
        Assert.assertEquals(1, l.size());
        BibEntry e = l.get(0);
        assertEqualsBibtexEntry(t1BibtexEntry(), e);
        try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
            if (document.isEncrypted()) {
                throw new IOException("Error: Cannot read metadata from encrypted document.");
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDMetadata metaRaw = catalog.getMetadata();
            XMPMetadata meta;
            if (metaRaw == null) {
                meta = new XMPMetadata();
            } else {
                meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
            }
            meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
            List<XMPSchema> schemas = meta.getSchemas();
            Assert.assertEquals(4, schemas.size());
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
            Assert.assertEquals("application/pdf", dc.getFormat());
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
            Assert.assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());
            Calendar c = Calendar.getInstance();
            c.clear();
            c.set(Calendar.YEAR, 2006);
            c.set(Calendar.MONTH, Calendar.AUGUST);
            c.set(Calendar.DATE, 7);
            c.set(Calendar.HOUR, 14);
            c.set(Calendar.MINUTE, 44);
            c.set(Calendar.SECOND, 24);
            c.setTimeZone(TimeZone.getTimeZone("GMT+2"));
            Calendar other = bs.getCreateDate();
            Assert.assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
            Assert.assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
            Assert.assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
            Assert.assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
            Assert.assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
            Assert.assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
            Assert.assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));
            schemas = meta.getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
            Assert.assertEquals(1, schemas.size());
            XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas.get(0);
            Assert.assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));
        }
    }
    // Now alter the Bibtex entry, write it and do all the checks again
    BibEntry toSet = t1BibtexEntry();
    toSet.setField("author", "Pokemon!");
    XMPUtil.writeXMP(pdfFile, toSet, null, xmpPreferences);
    List<BibEntry> l = XMPUtil.readXMP(pdfFile.getAbsoluteFile(), xmpPreferences);
    Assert.assertEquals(1, l.size());
    BibEntry e = l.get(0);
    assertEqualsBibtexEntry(toSet, e);
    try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
        if (document.isEncrypted()) {
            throw new IOException("Error: Cannot read metadata from encrypted document.");
        }
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();
        XMPMetadata meta;
        if (metaRaw == null) {
            meta = new XMPMetadata();
        } else {
            meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
        List<XMPSchema> schemas = meta.getSchemas();
        Assert.assertEquals(4, schemas.size());
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
        Assert.assertEquals("application/pdf", dc.getFormat());
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
        Assert.assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());
        Calendar c = Calendar.getInstance();
        c.clear();
        c.set(Calendar.YEAR, 2006);
        c.set(Calendar.MONTH, 7);
        c.set(Calendar.DATE, 7);
        c.set(Calendar.HOUR, 14);
        c.set(Calendar.MINUTE, 44);
        c.set(Calendar.SECOND, 24);
        c.setTimeZone(TimeZone.getTimeZone("GMT+2"));
        Calendar other = bs.getCreateDate();
        Assert.assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
        Assert.assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
        Assert.assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
        Assert.assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
        Assert.assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
        Assert.assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
        Assert.assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));
        schemas = meta.getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
        Assert.assertEquals(1, schemas.size());
        XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas.get(0);
        Assert.assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));
    }
}
Also used : XMPSchemaDublinCore(org.apache.jempbox.xmp.XMPSchemaDublinCore) BibEntry(org.jabref.model.entry.BibEntry) XMPSchemaBasic(org.apache.jempbox.xmp.XMPSchemaBasic) XMPSchema(org.apache.jempbox.xmp.XMPSchema) Calendar(java.util.Calendar) IOException(java.io.IOException) PDMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog) XMPSchemaMediaManagement(org.apache.jempbox.xmp.XMPSchemaMediaManagement) XMPMetadata(org.apache.jempbox.xmp.XMPMetadata) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) AuthorList(org.jabref.model.entry.AuthorList) List(java.util.List) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 2 with XMPSchemaMediaManagement

use of org.apache.jempbox.xmp.XMPSchemaMediaManagement in project tika by apache.

the class JempboxExtractor method extractXMPMM.

/**
     * Extracts Media Management metadata from XMP.
     *
     * Silently swallows exceptions.
     * @param xmp
     * @param metadata
     */
public static void extractXMPMM(XMPMetadata xmp, Metadata metadata) {
    if (xmp == null) {
        return;
    }
    XMPSchemaMediaManagement mmSchema = null;
    try {
        mmSchema = xmp.getMediaManagementSchema();
    } catch (IOException e) {
        //swallow
        return;
    }
    if (mmSchema != null) {
        addMetadata(metadata, XMPMM.DOCUMENTID, mmSchema.getDocumentID());
        //not currently supported by JempBox...
        //          metadata.set(XMPMM.INSTANCEID, mmSchema.getInstanceID());
        ResourceRef derivedFrom = mmSchema.getDerivedFrom();
        if (derivedFrom != null) {
            try {
                addMetadata(metadata, XMPMM.DERIVED_FROM_DOCUMENTID, derivedFrom.getDocumentID());
            } catch (NullPointerException e) {
            }
            try {
                addMetadata(metadata, XMPMM.DERIVED_FROM_INSTANCEID, derivedFrom.getInstanceID());
            } catch (NullPointerException e) {
            }
        //TODO: not yet supported by XMPBox...extract OriginalDocumentID
        //in DerivedFrom section
        }
        if (mmSchema.getHistory() != null) {
            int eventsAdded = 0;
            for (ResourceEvent stevt : mmSchema.getHistory()) {
                if (eventsAdded >= MAX_EVENT_HISTORY_IN_XMPMM) {
                    break;
                }
                String instanceId = null;
                String action = null;
                Calendar when = null;
                String softwareAgent = null;
                try {
                    instanceId = stevt.getInstanceID();
                    action = stevt.getAction();
                    when = stevt.getWhen();
                    softwareAgent = stevt.getSoftwareAgent();
                //instanceid can throw npe; getWhen can throw IOException
                } catch (NullPointerException | IOException e) {
                //swallow
                }
                if (instanceId != null && instanceId.trim().length() > 0) {
                    //for absent data elements, pass in empty strings so
                    //that parallel arrays will have matching offsets
                    //for absent data
                    action = (action == null) ? "" : action;
                    String dateString = (when == null) ? "" : DateUtils.formatDate(when);
                    softwareAgent = (softwareAgent == null) ? "" : softwareAgent;
                    metadata.add(XMPMM.HISTORY_EVENT_INSTANCEID, instanceId);
                    metadata.add(XMPMM.HISTORY_ACTION, action);
                    metadata.add(XMPMM.HISTORY_WHEN, dateString);
                    metadata.add(XMPMM.HISTORY_SOFTWARE_AGENT, softwareAgent);
                    eventsAdded++;
                }
            }
        }
    }
}
Also used : Calendar(java.util.Calendar) ResourceEvent(org.apache.jempbox.xmp.ResourceEvent) ResourceRef(org.apache.jempbox.xmp.ResourceRef) IOException(java.io.IOException) XMPSchemaMediaManagement(org.apache.jempbox.xmp.XMPSchemaMediaManagement)

Aggregations

IOException (java.io.IOException)2 Calendar (java.util.Calendar)2 XMPSchemaMediaManagement (org.apache.jempbox.xmp.XMPSchemaMediaManagement)2 LinkedList (java.util.LinkedList)1 List (java.util.List)1 ResourceEvent (org.apache.jempbox.xmp.ResourceEvent)1 ResourceRef (org.apache.jempbox.xmp.ResourceRef)1 XMPMetadata (org.apache.jempbox.xmp.XMPMetadata)1 XMPSchema (org.apache.jempbox.xmp.XMPSchema)1 XMPSchemaBasic (org.apache.jempbox.xmp.XMPSchemaBasic)1 XMPSchemaDublinCore (org.apache.jempbox.xmp.XMPSchemaDublinCore)1 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)1 PDDocumentCatalog (org.apache.pdfbox.pdmodel.PDDocumentCatalog)1 PDMetadata (org.apache.pdfbox.pdmodel.common.PDMetadata)1 AuthorList (org.jabref.model.entry.AuthorList)1 BibEntry (org.jabref.model.entry.BibEntry)1 Test (org.junit.Test)1