use of org.apache.pdfbox.pdmodel.PDDocumentCatalog in project mustangproject by ZUGFeRD.
the class ZUGFeRDExporterFromA1Factory method makePDFA3compliant.
private void makePDFA3compliant(PDDocument doc) throws IOException {
String fullProducer = producer + " (via mustangproject.org " + Version.VERSION + ")";
PDDocumentCatalog cat = doc.getDocumentCatalog();
PDMetadata metadata = new PDMetadata(doc);
cat.setMetadata(metadata);
XMPMetadata xmp = XMPMetadata.createXMPMetadata();
PDFAIdentificationSchema pdfaid = new PDFAIdentificationSchema(xmp);
xmp.addSchema(pdfaid);
DublinCoreSchema dc = xmp.createAndAddDublinCoreSchema();
dc.addCreator(creator);
XMPBasicSchema xsb = xmp.createAndAddXMPBasicSchema();
xsb.setCreatorTool(creator);
xsb.setCreateDate(GregorianCalendar.getInstance());
// PDDocumentInformation pdi=doc.getDocumentInformation();
PDDocumentInformation pdi = new PDDocumentInformation();
pdi.setProducer(fullProducer);
pdi.setAuthor(creator);
doc.setDocumentInformation(pdi);
AdobePDFSchema pdf = xmp.createAndAddAdobePDFSchema();
pdf.setProducer(fullProducer);
/*
*
* To be on the safe side, we use level B without Markinfo because we
* can not guarantee that the user correctly tagged the templates for
* the PDF.
*/
try {
//$NON-NLS-1$ //$NON-NLS-1$
pdfaid.setConformance(conformanceLevel.getLetter());
} catch (BadFieldValueException ex) {
// however the enum enforces that the conformance level is valid.
throw new Error(ex);
}
pdfaid.setPart(3);
if (attachZugferdHeaders) {
addZugferdXMP(xmp);
/*
* this is the only line where we do something
* Zugferd-specific, i.e. add PDF metadata
* specifically for Zugferd, not generically for
* a embedded file
*/
}
try {
metadata.importXMPMetadata(serializeXmpMetadata(xmp));
} catch (TransformerException e) {
throw new ZUGFeRDExportException("Could not export XmpMetadata", e);
}
}
use of org.apache.pdfbox.pdmodel.PDDocumentCatalog in project Gargoyle by callakrsos.
the class PDFUtil method toImage.
/**
* @작성자 : KYJ
* @작성일 : 2017. 2. 20.
* @param pdfFile
* @param handler
* @throws IOException
*/
public static void toImage(PdfToImageHandler handler) throws IOException {
File pdfFile = handler.getPdfFile();
try (PDDocument doc = PDDocument.load(pdfFile)) {
PDFRenderer pdfRenderer = new PDFRenderer(doc);
PDDocumentCatalog catal = doc.getDocumentCatalog();
PDPageTree pages = catal.getPages();
int totalPageCount = pages.getCount();
int start = handler.getStartPage();
int end = handler.getEndPage();
//페이지 유효성 검증
if (start > end) {
throw new RuntimeException(String.format("Invalide page index start : %d end : %d", start, end));
}
if (start == -1)
start = 0;
if (end == -1)
end = totalPageCount;
if (end > totalPageCount) {
end = totalPageCount;
}
// 파일 디렉토리 검증
File outputDir = handler.getOutputDir();
if (!outputDir.isDirectory()) {
throw new RuntimeException("OutputDir is not Directory.");
}
//디렉토리가 없으면 생성
if (!outputDir.exists())
outputDir.mkdirs();
for (int currentPage = start; currentPage < totalPageCount; currentPage++) {
if (currentPage > end)
break;
BufferedImage renderImage = pdfRenderer.renderImage(currentPage);
handler.write(outputDir, currentPage, renderImage);
}
}
}
use of org.apache.pdfbox.pdmodel.PDDocumentCatalog in project tika by apache.
the class AbstractPDF2XHTML method extractAcroForm.
void extractAcroForm(PDDocument pdf) throws IOException, SAXException, TikaException {
//Thank you, Ben Litchfield, for org.apache.pdfbox.examples.fdf.PrintFields
//this code derives from Ben's code
PDDocumentCatalog catalog = pdf.getDocumentCatalog();
if (catalog == null)
return;
PDAcroForm form = catalog.getAcroForm();
if (form == null)
return;
//if it has xfa, try that.
//if it doesn't exist or there's an exception,
//go with traditional AcroForm
PDXFAResource pdxfa = form.getXFA();
if (pdxfa != null) {
//if successful, return
XFAExtractor xfaExtractor = new XFAExtractor();
InputStream is = null;
try {
is = new BufferedInputStream(new ByteArrayInputStream(pdxfa.getBytes()));
} catch (IOException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
}
if (is != null) {
try {
xfaExtractor.extract(is, xhtml, metadata, context);
return;
} catch (XMLStreamException e) {
//if there was an xml parse exception in xfa, try the AcroForm
EmbeddedDocumentUtil.recordException(e, metadata);
} finally {
IOUtils.closeQuietly(is);
}
}
}
@SuppressWarnings("rawtypes") List fields = form.getFields();
if (fields == null)
return;
@SuppressWarnings("rawtypes") ListIterator itr = fields.listIterator();
if (itr == null)
return;
xhtml.startElement("div", "class", "acroform");
xhtml.startElement("ol");
while (itr.hasNext()) {
Object obj = itr.next();
if (obj != null && obj instanceof PDField) {
processAcroField((PDField) obj, 0);
}
}
xhtml.endElement("ol");
xhtml.endElement("div");
}
use of org.apache.pdfbox.pdmodel.PDDocumentCatalog in project jabref by JabRef.
the class XMPUtil method getXMPMetadata.
/**
* @return empty Optional if no metadata has been found
*/
private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
return Optional.empty();
}
Document parseResult;
try (InputStream is = metaRaw.createInputStream()) {
parseResult = XMLUtil.parse(is);
}
XMPMetadata meta = new XMPMetadata(parseResult);
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
return Optional.of(meta);
}
use of org.apache.pdfbox.pdmodel.PDDocumentCatalog in project jabref by JabRef.
the class XMPUtilTest method testReadWriteDC.
@Test
public void testReadWriteDC() throws IOException, TransformerException {
List<BibEntry> l = new LinkedList<>();
l.add(t3BibtexEntry());
XMPUtil.writeXMP(pdfFile, l, null, true, xmpPreferences);
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
if (document.isEncrypted()) {
Assert.fail("Cannot add metadata to encrypted document.");
}
Assert.assertEquals("Kelly Clarkson and Ozzy Osbourne", document.getDocumentInformation().getAuthor());
Assert.assertEquals("Hypersonic ultra-sound", document.getDocumentInformation().getTitle());
Assert.assertEquals("Huey Duck and Dewey Duck and Louie Duck", document.getDocumentInformation().getCustomMetadataValue("bibtex/editor"));
Assert.assertEquals("Clarkson06", document.getDocumentInformation().getCustomMetadataValue("bibtex/bibtexkey"));
Assert.assertEquals("peanut, butter, jelly", document.getDocumentInformation().getKeywords());
assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDocumentInformation(document.getDocumentInformation()).get());
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
Assert.fail();
return;
}
XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
// Check Dublin Core
List<XMPSchema> schemas = meta.getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
Assert.assertEquals(1, schemas.size());
XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas.iterator().next();
Assert.assertNotNull(dcSchema);
Assert.assertEquals("Hypersonic ultra-sound", dcSchema.getTitle());
Assert.assertEquals("1982-07", dcSchema.getSequenceList("dc:date").get(0));
Assert.assertEquals("Kelly Clarkson", dcSchema.getCreators().get(0));
Assert.assertEquals("Ozzy Osbourne", dcSchema.getCreators().get(1));
Assert.assertEquals("Huey Duck", dcSchema.getContributors().get(0));
Assert.assertEquals("Dewey Duck", dcSchema.getContributors().get(1));
Assert.assertEquals("Louie Duck", dcSchema.getContributors().get(2));
Assert.assertEquals("InProceedings".toLowerCase(), dcSchema.getTypes().get(0).toLowerCase());
Assert.assertTrue(dcSchema.getRelationships().contains("bibtex/bibtexkey/Clarkson06"));
Assert.assertEquals("peanut", dcSchema.getSubjects().get(0));
Assert.assertEquals("butter", dcSchema.getSubjects().get(1));
Assert.assertEquals("jelly", dcSchema.getSubjects().get(2));
/**
* Bibtexkey, Journal, pdf, booktitle
*/
Assert.assertEquals(4, dcSchema.getRelationships().size());
assertEqualsBibtexEntry(t3BibtexEntry(), XMPUtil.getBibtexEntryFromDublinCore(dcSchema, xmpPreferences).get());
}
}
Aggregations