use of org.apache.jempbox.xmp.XMPMetadata in project tika by apache.
the class ImageMetadataExtractor method parseRawXMP.
public void parseRawXMP(byte[] xmpData) throws IOException, SAXException, TikaException {
XMPMetadata xmp = null;
try (InputStream decoded = new ByteArrayInputStream(xmpData)) {
Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
if (dom != null) {
xmp = new XMPMetadata(dom);
}
} catch (IOException | SAXException e) {
//
}
if (xmp != null) {
JempboxExtractor.extractDublinCore(xmp, metadata);
JempboxExtractor.extractXMPMM(xmp, metadata);
}
}
use of org.apache.jempbox.xmp.XMPMetadata in project jabref by JabRef.
the class XMPUtil method readXMP.
/**
* Try to read the given BibTexEntry from the XMP-stream of the given
* inputstream containing a PDF-file.
*
* @param inputStream The inputstream to read from.
* @return list of BibEntries retrieved from the stream. May be empty, but never null
* @throws IOException Throws an IOException if the file cannot be read, so the user than remove a lock or cancel
* the operation.
*/
public static List<BibEntry> readXMP(InputStream inputStream, XMPPreferences xmpPreferences) throws IOException {
List<BibEntry> result = new LinkedList<>();
try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
Optional<XMPMetadata> meta = XMPUtil.getXMPMetadata(document);
if (meta.isPresent()) {
List<XMPSchema> schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
BibEntry entry = bib.getBibtexEntry();
if (entry.getType() == null) {
entry.setType(BibEntry.DEFAULT_TYPE);
}
result.add(entry);
}
// If we did not find anything have a look if a Dublin Core exists
if (result.isEmpty()) {
schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;
Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDublinCore(dc, xmpPreferences);
if (entry.isPresent()) {
if (entry.get().getType() == null) {
entry.get().setType(BibEntry.DEFAULT_TYPE);
}
result.add(entry.get());
}
}
}
}
if (result.isEmpty()) {
// If we did not find any XMP metadata, search for non XMP metadata
PDDocumentInformation documentInformation = document.getDocumentInformation();
Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDocumentInformation(documentInformation);
entry.ifPresent(result::add);
}
}
// return empty list, if no metadata was found
if (result.isEmpty()) {
return Collections.emptyList();
}
return result;
}
use of org.apache.jempbox.xmp.XMPMetadata in project jabref by JabRef.
the class XMPUtil method getXMPMetadata.
/**
* @return empty Optional if no metadata has been found
*/
private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
return Optional.empty();
}
Document parseResult;
try (InputStream is = metaRaw.createInputStream()) {
parseResult = XMLUtil.parse(is);
}
XMPMetadata meta = new XMPMetadata(parseResult);
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
return Optional.of(meta);
}
use of org.apache.jempbox.xmp.XMPMetadata in project jabref by JabRef.
the class XMPUtilTest method testReadWriteDC.
@Test
public void testReadWriteDC() throws IOException, TransformerException {
List<BibEntry> l = new LinkedList<>();
l.add(t3BibtexEntry());
XMPUtil.writeXMP(pdfFile, l, null, true, xmpPreferences);
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
if (document.isEncrypted()) {
Assert.fail("Cannot add metadata to encrypted document.");
}
Assert.assertEquals("Kelly Clarkson and Ozzy Osbourne", document.getDocumentInformation().getAuthor());
Assert.assertEquals("Hypersonic ultra-sound", document.getDocumentInformation().getTitle());
Assert.assertEquals("Huey Duck and Dewey Duck and Louie Duck", document.getDocumentInformation().getCustomMetadataValue("bibtex/editor"));
Assert.assertEquals("Clarkson06", document.getDocumentInformation().getCustomMetadataValue("bibtex/bibtexkey"));
Assert.assertEquals("peanut, butter, jelly", document.getDocumentInformation().getKeywords());
assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDocumentInformation(document.getDocumentInformation()).get());
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
Assert.fail();
return;
}
XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
// Check Dublin Core
List<XMPSchema> schemas = meta.getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas.iterator().next();
Assert.assertNotNull(dcSchema);
Assert.assertEquals("Hypersonic ultra-sound", dcSchema.getTitle());
Assert.assertEquals("1982-07", dcSchema.getSequenceList("dc:date").get(0));
Assert.assertEquals("Kelly Clarkson", dcSchema.getCreators().get(0));
Assert.assertEquals("Ozzy Osbourne", dcSchema.getCreators().get(1));
Assert.assertEquals("Huey Duck", dcSchema.getContributors().get(0));
Assert.assertEquals("Dewey Duck", dcSchema.getContributors().get(1));
Assert.assertEquals("Louie Duck", dcSchema.getContributors().get(2));
Assert.assertEquals("InProceedings".toLowerCase(), dcSchema.getTypes().get(0).toLowerCase());
Assert.assertTrue(dcSchema.getRelationships().contains("bibtex/bibtexkey/Clarkson06"));
Assert.assertEquals("peanut", dcSchema.getSubjects().get(0));
Assert.assertEquals("butter", dcSchema.getSubjects().get(1));
Assert.assertEquals("jelly", dcSchema.getSubjects().get(2));
/**
* Bibtexkey, Journal, pdf, booktitle
*/
Assert.assertEquals(4, dcSchema.getRelationships().size());
assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDublinCore(dcSchema, xmpPreferences).get());
}
}
use of org.apache.jempbox.xmp.XMPMetadata in project jabref by JabRef.
the class XMPUtilTest method testResolveStrings2.
/**
* A better testcase for resolveStrings. Makes sure that also the document information and dublin core are written
* correctly.
* <p/>
* Data was contributed by Philip K.F. Hölzenspies (p.k.f.holzenspies [at] utwente.nl).
*
* @throws IOException
* @throws TransformerException
*/
@Test
public void testResolveStrings2() throws IOException, TransformerException {
try (BufferedReader fr = Files.newBufferedReader(Paths.get("src/test/resources/org/jabref/util/twente.bib"), StandardCharsets.UTF_8)) {
ParserResult result = new BibtexParser(importFormatPreferences).parse(fr);
Assert.assertEquals("Arvind", result.getDatabase().resolveForStrings("#Arvind#"));
AuthorList originalAuthors = AuthorList.parse("Patterson, David and Arvind and Asanov\\'\\i{}c, Krste and Chiou, Derek and Hoe, James and Kozyrakis, Christos and Lu, S{hih-Lien} and Oskin, Mark and Rabaey, Jan and Wawrzynek, John");
try {
XMPUtil.writeXMP(pdfFile, result.getDatabase().getEntryByKey("Patterson06").get(), result.getDatabase(), xmpPreferences);
// Test whether we the main function can load the bibtex correctly
BibEntry b = XMPUtil.readXMP(pdfFile, xmpPreferences).get(0);
Assert.assertNotNull(b);
Assert.assertEquals(originalAuthors, AuthorList.parse(b.getField("author").get()));
// Next check from Document Information
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
Assert.assertEquals(originalAuthors, AuthorList.parse(document.getDocumentInformation().getAuthor()));
b = XMPUtil.getBibtexEntryFromDocumentInformation(document.getDocumentInformation()).get();
Assert.assertEquals(originalAuthors, AuthorList.parse(b.getField("author").get()));
// Now check from Dublin Core
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
Assert.fail();
// To avoid warnings
return;
}
XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
List<XMPSchema> schemas = meta.getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas.iterator().next();
Assert.assertNotNull(dcSchema);
Assert.assertEquals("David Patterson", dcSchema.getCreators().get(0));
Assert.assertEquals("Arvind", dcSchema.getCreators().get(1));
Assert.assertEquals("Krste Asanov\\'\\i{}c", dcSchema.getCreators().get(2));
b = XMPUtil.getBibtexEntryFromDublinCore(dcSchema, xmpPreferences).get();
Assert.assertNotNull(b);
Assert.assertEquals(originalAuthors, AuthorList.parse(b.getField("author").get()));
}
} finally {
if (!pdfFile.delete()) {
System.err.println("Cannot delete temporary file");
}
}
}
}
Aggregations