use of org.apache.jempbox.xmp.XMPSchema in project jabref by JabRef.
the class XMPUtil method readXMP.
/**
* Try to read the given BibTexEntry from the XMP-stream of the given
* inputstream containing a PDF-file.
*
* @param inputStream The inputstream to read from.
* @return list of BibEntries retrieved from the stream. May be empty, but never null
* @throws IOException Throws an IOException if the file cannot be read, so the user than remove a lock or cancel
* the operation.
*/
public static List<BibEntry> readXMP(InputStream inputStream, XMPPreferences xmpPreferences) throws IOException {
List<BibEntry> result = new LinkedList<>();
try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
Optional<XMPMetadata> meta = XMPUtil.getXMPMetadata(document);
if (meta.isPresent()) {
List<XMPSchema> schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
BibEntry entry = bib.getBibtexEntry();
if (entry.getType() == null) {
entry.setType(BibEntry.DEFAULT_TYPE);
}
result.add(entry);
}
// If we did not find anything have a look if a Dublin Core exists
if (result.isEmpty()) {
schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;
Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDublinCore(dc, xmpPreferences);
if (entry.isPresent()) {
if (entry.get().getType() == null) {
entry.get().setType(BibEntry.DEFAULT_TYPE);
}
result.add(entry.get());
}
}
}
}
if (result.isEmpty()) {
// If we did not find any XMP metadata, search for non XMP metadata
PDDocumentInformation documentInformation = document.getDocumentInformation();
Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDocumentInformation(documentInformation);
entry.ifPresent(result::add);
}
}
// return empty list, if no metadata was found
if (result.isEmpty()) {
return Collections.emptyList();
}
return result;
}
use of org.apache.jempbox.xmp.XMPSchema in project jabref by JabRef.
the class XMPUtilTest method testReadWriteDC.
@Test
public void testReadWriteDC() throws IOException, TransformerException {
List<BibEntry> l = new LinkedList<>();
l.add(t3BibtexEntry());
XMPUtil.writeXMP(pdfFile, l, null, true, xmpPreferences);
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
if (document.isEncrypted()) {
Assert.fail("Cannot add metadata to encrypted document.");
}
Assert.assertEquals("Kelly Clarkson and Ozzy Osbourne", document.getDocumentInformation().getAuthor());
Assert.assertEquals("Hypersonic ultra-sound", document.getDocumentInformation().getTitle());
Assert.assertEquals("Huey Duck and Dewey Duck and Louie Duck", document.getDocumentInformation().getCustomMetadataValue("bibtex/editor"));
Assert.assertEquals("Clarkson06", document.getDocumentInformation().getCustomMetadataValue("bibtex/bibtexkey"));
Assert.assertEquals("peanut, butter, jelly", document.getDocumentInformation().getKeywords());
assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDocumentInformation(document.getDocumentInformation()).get());
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
Assert.fail();
return;
}
XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
// Check Dublin Core
List<XMPSchema> schemas = meta.getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas.iterator().next();
Assert.assertNotNull(dcSchema);
Assert.assertEquals("Hypersonic ultra-sound", dcSchema.getTitle());
Assert.assertEquals("1982-07", dcSchema.getSequenceList("dc:date").get(0));
Assert.assertEquals("Kelly Clarkson", dcSchema.getCreators().get(0));
Assert.assertEquals("Ozzy Osbourne", dcSchema.getCreators().get(1));
Assert.assertEquals("Huey Duck", dcSchema.getContributors().get(0));
Assert.assertEquals("Dewey Duck", dcSchema.getContributors().get(1));
Assert.assertEquals("Louie Duck", dcSchema.getContributors().get(2));
Assert.assertEquals("InProceedings".toLowerCase(), dcSchema.getTypes().get(0).toLowerCase());
Assert.assertTrue(dcSchema.getRelationships().contains("bibtex/bibtexkey/Clarkson06"));
Assert.assertEquals("peanut", dcSchema.getSubjects().get(0));
Assert.assertEquals("butter", dcSchema.getSubjects().get(1));
Assert.assertEquals("jelly", dcSchema.getSubjects().get(2));
/**
* Bibtexkey, Journal, pdf, booktitle
*/
Assert.assertEquals(4, dcSchema.getRelationships().size());
assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDublinCore(dcSchema, xmpPreferences).get());
}
}
use of org.apache.jempbox.xmp.XMPSchema in project jabref by JabRef.
the class XMPUtilTest method testResolveStrings2.
/**
* A better testcase for resolveStrings. Makes sure that also the document information and dublin core are written
* correctly.
* <p/>
* Data was contributed by Philip K.F. Hölzenspies (p.k.f.holzenspies [at] utwente.nl).
*
* @throws IOException
* @throws TransformerException
*/
@Test
public void testResolveStrings2() throws IOException, TransformerException {
try (BufferedReader fr = Files.newBufferedReader(Paths.get("src/test/resources/org/jabref/util/twente.bib"), StandardCharsets.UTF_8)) {
ParserResult result = new BibtexParser(importFormatPreferences).parse(fr);
Assert.assertEquals("Arvind", result.getDatabase().resolveForStrings("#Arvind#"));
AuthorList originalAuthors = AuthorList.parse("Patterson, David and Arvind and Asanov\\'\\i{}c, Krste and Chiou, Derek and Hoe, James and Kozyrakis, Christos and Lu, S{hih-Lien} and Oskin, Mark and Rabaey, Jan and Wawrzynek, John");
try {
XMPUtil.writeXMP(pdfFile, result.getDatabase().getEntryByKey("Patterson06").get(), result.getDatabase(), xmpPreferences);
// Test whether we the main function can load the bibtex correctly
BibEntry b = XMPUtil.readXMP(pdfFile, xmpPreferences).get(0);
Assert.assertNotNull(b);
Assert.assertEquals(originalAuthors, AuthorList.parse(b.getField("author").get()));
// Next check from Document Information
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
Assert.assertEquals(originalAuthors, AuthorList.parse(document.getDocumentInformation().getAuthor()));
b = XMPUtil.getBibtexEntryFromDocumentInformation(document.getDocumentInformation()).get();
Assert.assertEquals(originalAuthors, AuthorList.parse(b.getField("author").get()));
// Now check from Dublin Core
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
Assert.fail();
// To avoid warnings
return;
}
XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
List<XMPSchema> schemas = meta.getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas.iterator().next();
Assert.assertNotNull(dcSchema);
Assert.assertEquals("David Patterson", dcSchema.getCreators().get(0));
Assert.assertEquals("Arvind", dcSchema.getCreators().get(1));
Assert.assertEquals("Krste Asanov\\'\\i{}c", dcSchema.getCreators().get(2));
b = XMPUtil.getBibtexEntryFromDublinCore(dcSchema, xmpPreferences).get();
Assert.assertNotNull(b);
Assert.assertEquals(originalAuthors, AuthorList.parse(b.getField("author").get()));
}
} finally {
if (!pdfFile.delete()) {
System.err.println("Cannot delete temporary file");
}
}
}
}
use of org.apache.jempbox.xmp.XMPSchema in project jabref by JabRef.
the class XMPUtilTest method testSimpleUpdate.
/**
* Tests whether writing BibTex.xmp will preserve existing XMP-descriptions.
*
* @throws Exception (indicating an failure)
*/
@Test
public void testSimpleUpdate() throws Exception {
String s = " <rdf:Description rdf:about=''" + " xmlns:xmp='http://ns.adobe.com/xap/1.0/'>" + " <xmp:CreatorTool>Acrobat PDFMaker 7.0.7</xmp:CreatorTool>" + " <xmp:ModifyDate>2006-08-07T18:50:24+02:00</xmp:ModifyDate>" + " <xmp:CreateDate>2006-08-07T14:44:24+02:00</xmp:CreateDate>" + " <xmp:MetadataDate>2006-08-07T18:50:24+02:00</xmp:MetadataDate>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + " xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/'>" + " <xapMM:DocumentID>uuid:843cd67d-495e-4c1e-a4cd-64178f6b3299</xapMM:DocumentID>" + " <xapMM:InstanceID>uuid:1e56b4c0-6782-440d-ba76-d2b3d87547d1</xapMM:InstanceID>" + " <xapMM:VersionID>" + " <rdf:Seq>" + " <rdf:li>17</rdf:li>" + " </rdf:Seq>" + " </xapMM:VersionID>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + " xmlns:dc='http://purl.org/dc/elements/1.1/'>" + " <dc:format>application/pdf</dc:format>" + "</rdf:Description>";
writeManually(pdfFile, XMPUtilTest.bibtexXPacket(s));
// Nothing there yet, but should not crash
Assert.assertEquals(Collections.emptyList(), XMPUtil.readXMP(pdfFile, xmpPreferences));
s = " <rdf:Description rdf:about=''" + " xmlns:xmp='http://ns.adobe.com/xap/1.0/'>" + " <xmp:CreatorTool>Acrobat PDFMaker 7.0.7</xmp:CreatorTool>" + " <xmp:ModifyDate>2006-08-07T18:50:24+02:00</xmp:ModifyDate>" + " <xmp:CreateDate>2006-08-07T14:44:24+02:00</xmp:CreateDate>" + " <xmp:MetadataDate>2006-08-07T18:50:24+02:00</xmp:MetadataDate>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + " xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/'>" + " <xapMM:DocumentID>uuid:843cd67d-495e-4c1e-a4cd-64178f6b3299</xapMM:DocumentID>" + " <xapMM:InstanceID>uuid:1e56b4c0-6782-440d-ba76-d2b3d87547d1</xapMM:InstanceID>" + " <xapMM:VersionID>" + " <rdf:Seq>" + " <rdf:li>17</rdf:li>" + " </rdf:Seq>" + " </xapMM:VersionID>" + " </rdf:Description>" + " <rdf:Description rdf:about=''" + " xmlns:dc='http://purl.org/dc/elements/1.1/'>" + " <dc:format>application/pdf</dc:format>" + " <dc:title>" + " <rdf:Alt>" + " <rdf:li xml:lang='x-default'>Questionnaire.pdf</rdf:li>" + " </rdf:Alt>" + " </dc:title>" + "</rdf:Description>";
writeManually(pdfFile, XMPUtilTest.bibtexXPacket(s));
// Title is Questionnaire.pdf so the DublinCore fallback should hit
// in...
Assert.assertEquals(1, XMPUtil.readXMP(pdfFile, xmpPreferences).size());
{
// Now write new packet and check if it was correctly written
XMPUtil.writeXMP(pdfFile, t1BibtexEntry(), null, xmpPreferences);
List<BibEntry> l = XMPUtil.readXMP(pdfFile.getAbsoluteFile(), xmpPreferences);
Assert.assertEquals(1, l.size());
BibEntry e = l.get(0);
assertEqualsBibtexEntry(t1BibtexEntry(), e);
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
if (document.isEncrypted()) {
throw new IOException("Error: Cannot read metadata from encrypted document.");
}
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
XMPMetadata meta;
if (metaRaw == null) {
meta = new XMPMetadata();
} else {
meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
}
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
List<XMPSchema> schemas = meta.getSchemas();
Assert.assertEquals(4, schemas.size());
schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
Assert.assertEquals(1, schemas.size());
schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
Assert.assertEquals("application/pdf", dc.getFormat());
schemas = meta.getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
Assert.assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());
Calendar c = Calendar.getInstance();
c.clear();
c.set(Calendar.YEAR, 2006);
c.set(Calendar.MONTH, Calendar.AUGUST);
c.set(Calendar.DATE, 7);
c.set(Calendar.HOUR, 14);
c.set(Calendar.MINUTE, 44);
c.set(Calendar.SECOND, 24);
c.setTimeZone(TimeZone.getTimeZone("GMT+2"));
Calendar other = bs.getCreateDate();
Assert.assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
Assert.assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
Assert.assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
Assert.assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
Assert.assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
Assert.assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
Assert.assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));
schemas = meta.getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas.get(0);
Assert.assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));
}
}
// Now alter the Bibtex entry, write it and do all the checks again
BibEntry toSet = t1BibtexEntry();
toSet.setField("author", "Pokemon!");
XMPUtil.writeXMP(pdfFile, toSet, null, xmpPreferences);
List<BibEntry> l = XMPUtil.readXMP(pdfFile.getAbsoluteFile(), xmpPreferences);
Assert.assertEquals(1, l.size());
BibEntry e = l.get(0);
assertEqualsBibtexEntry(toSet, e);
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
if (document.isEncrypted()) {
throw new IOException("Error: Cannot read metadata from encrypted document.");
}
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
XMPMetadata meta;
if (metaRaw == null) {
meta = new XMPMetadata();
} else {
meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
}
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
List<XMPSchema> schemas = meta.getSchemas();
Assert.assertEquals(4, schemas.size());
schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
Assert.assertEquals(1, schemas.size());
schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
Assert.assertEquals("application/pdf", dc.getFormat());
schemas = meta.getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
Assert.assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());
Calendar c = Calendar.getInstance();
c.clear();
c.set(Calendar.YEAR, 2006);
c.set(Calendar.MONTH, 7);
c.set(Calendar.DATE, 7);
c.set(Calendar.HOUR, 14);
c.set(Calendar.MINUTE, 44);
c.set(Calendar.SECOND, 24);
c.setTimeZone(TimeZone.getTimeZone("GMT+2"));
Calendar other = bs.getCreateDate();
Assert.assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
Assert.assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
Assert.assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
Assert.assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
Assert.assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
Assert.assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
Assert.assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));
schemas = meta.getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas.get(0);
Assert.assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));
}
}
use of org.apache.jempbox.xmp.XMPSchema in project jabref by JabRef.
the class XMPUtilTest method testReadRawXMP.
@Test
public void testReadRawXMP() throws IOException, TransformerException {
ParserResult result = BibtexParser.parse(new StringReader("@article{canh05," + " author = {Crowston, K. and Annabi, H. and Howison, J. and Masango, C.},\n" + " title = {Effective work practices for floss development: A model and propositions},\n" + " booktitle = {Hawaii International Conference On System Sciences (HICSS)},\n" + " year = {2005},\n" + " owner = {oezbek},\n" + " timestamp = {2006.05.29},\n" + " url = {http://james.howison.name/publications.html}}"), importFormatPreferences);
Collection<BibEntry> c = result.getDatabase().getEntries();
Assert.assertEquals(1, c.size());
BibEntry e = c.iterator().next();
XMPUtil.writeXMP(pdfFile, e, null, xmpPreferences);
Optional<XMPMetadata> metadata = XMPUtil.readRawXMP(pdfFile);
Assert.assertTrue(metadata.isPresent());
List<XMPSchema> schemas = metadata.get().getSchemas();
Assert.assertEquals(2, schemas.size());
schemas = metadata.get().getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
Assert.assertEquals(1, schemas.size());
XMPSchemaBibtex bib = (XMPSchemaBibtex) schemas.get(0);
List<String> authors = bib.getSequenceList("author");
Assert.assertEquals(4, authors.size());
Assert.assertEquals("K. Crowston", authors.get(0));
Assert.assertEquals("H. Annabi", authors.get(1));
Assert.assertEquals("J. Howison", authors.get(2));
Assert.assertEquals("C. Masango", authors.get(3));
Assert.assertEquals("article", bib.getTextProperty("entrytype"));
Assert.assertEquals("Effective work practices for floss development: A model and propositions", bib.getTextProperty("title"));
Assert.assertEquals("Hawaii International Conference On System Sciences (HICSS)", bib.getTextProperty("booktitle"));
Assert.assertEquals("2005", bib.getTextProperty("year"));
Assert.assertEquals("oezbek", bib.getTextProperty("owner"));
Assert.assertEquals("http://james.howison.name/publications.html", bib.getTextProperty("url"));
}
Aggregations