use of org.apache.pdfbox.pdmodel.encryption.AccessPermission in project camel by apache.
the class PdfAppendTest method testAppendEncrypted.
@Test
public void testAppendEncrypted() throws Exception {
final String originalText = "Test";
final String textToAppend = "Append";
PDDocument document = new PDDocument();
PDPage page = new PDPage(PDPage.PAGE_SIZE_A4);
document.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(document, page);
contentStream.setFont(PDType1Font.HELVETICA, 12);
contentStream.beginText();
contentStream.moveTextPositionByAmount(20, 400);
contentStream.drawString(originalText);
contentStream.endText();
contentStream.close();
final String ownerPass = "ownerPass";
final String userPass = "userPass";
AccessPermission accessPermission = new AccessPermission();
accessPermission.setCanExtractContent(false);
StandardProtectionPolicy protectionPolicy = new StandardProtectionPolicy(ownerPass, userPass, accessPermission);
protectionPolicy.setEncryptionKeyLength(128);
document.protect(protectionPolicy);
ByteArrayOutputStream output = new ByteArrayOutputStream();
document.save(output);
// Encryption happens after saving.
PDDocument encryptedDocument = PDDocument.load(new ByteArrayInputStream(output.toByteArray()));
Map<String, Object> headers = new HashMap<String, Object>();
headers.put(PdfHeaderConstants.PDF_DOCUMENT_HEADER_NAME, encryptedDocument);
headers.put(PdfHeaderConstants.DECRYPTION_MATERIAL_HEADER_NAME, new StandardDecryptionMaterial(userPass));
template.sendBodyAndHeaders("direct:start", textToAppend, headers);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody();
assertThat(body, instanceOf(ByteArrayOutputStream.class));
try {
PDDocument doc = PDDocument.load(new ByteArrayInputStream(((ByteArrayOutputStream) body).toByteArray()));
PDFTextStripper pdfTextStripper = new PDFTextStripper();
String text = pdfTextStripper.getText(doc);
assertEquals(2, doc.getNumberOfPages());
assertThat(text, containsString(originalText));
assertThat(text, containsString(textToAppend));
} catch (IOException e) {
throw new RuntimeException(e);
}
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
use of org.apache.pdfbox.pdmodel.encryption.AccessPermission in project camel by apache.
the class PdfTextExtractionTest method testExtractTextFromEncrypted.
@Test
public void testExtractTextFromEncrypted() throws Exception {
final String ownerPass = "ownerPass";
final String userPass = "userPass";
AccessPermission accessPermission = new AccessPermission();
accessPermission.setCanExtractContent(false);
StandardProtectionPolicy protectionPolicy = new StandardProtectionPolicy(ownerPass, userPass, accessPermission);
protectionPolicy.setEncryptionKeyLength(128);
PDDocument document = new PDDocument();
final String expectedText = "Test string";
PDPage page = new PDPage(PDPage.PAGE_SIZE_A4);
document.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(document, page);
contentStream.setFont(PDType1Font.HELVETICA, 12);
contentStream.beginText();
contentStream.moveTextPositionByAmount(20, 400);
contentStream.drawString(expectedText);
contentStream.endText();
contentStream.close();
document.protect(protectionPolicy);
ByteArrayOutputStream output = new ByteArrayOutputStream();
document.save(output);
// Encryption happens after saving.
PDDocument encryptedDocument = PDDocument.load(new ByteArrayInputStream(output.toByteArray()));
template.sendBodyAndHeader("direct:start", encryptedDocument, PdfHeaderConstants.DECRYPTION_MATERIAL_HEADER_NAME, new StandardDecryptionMaterial(userPass));
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString(expectedText));
return true;
}
});
resultEndpoint.assertIsSatisfied();
document.isEncrypted();
}
use of org.apache.pdfbox.pdmodel.encryption.AccessPermission in project camel by apache.
the class PdfCreationTest method testPdfCreationWithEncryption.
@Test
public void testPdfCreationWithEncryption() throws Exception {
final String ownerPass = "ownerPass";
final String userPass = "userPass";
final String expectedText = "expectedText";
AccessPermission accessPermission = new AccessPermission();
accessPermission.setCanPrint(false);
StandardProtectionPolicy protectionPolicy = new StandardProtectionPolicy(ownerPass, userPass, accessPermission);
protectionPolicy.setEncryptionKeyLength(128);
template.sendBodyAndHeader("direct:start", expectedText, PdfHeaderConstants.PROTECTION_POLICY_HEADER_NAME, protectionPolicy);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody();
assertThat(body, instanceOf(ByteArrayOutputStream.class));
try {
PDDocument doc = PDDocument.load(new ByteArrayInputStream(((ByteArrayOutputStream) body).toByteArray()));
assertTrue("Expected encrypted document", doc.isEncrypted());
doc.decrypt(userPass);
assertFalse("Printing should not be permitted", doc.getCurrentAccessPermission().canPrint());
PDFTextStripper pdfTextStripper = new PDFTextStripper();
String text = pdfTextStripper.getText(doc);
assertEquals(1, doc.getNumberOfPages());
assertThat(text, containsString(expectedText));
} catch (Exception e) {
throw new RuntimeException(e);
}
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
use of org.apache.pdfbox.pdmodel.encryption.AccessPermission in project tika by apache.
the class PDFParser method extractMetadata.
private void extractMetadata(PDDocument document, Metadata metadata, ParseContext context) throws TikaException {
//first extract AccessPermissions
AccessPermission ap = document.getCurrentAccessPermission();
metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, Boolean.toString(ap.canExtractForAccessibility()));
metadata.set(AccessPermissions.EXTRACT_CONTENT, Boolean.toString(ap.canExtractContent()));
metadata.set(AccessPermissions.ASSEMBLE_DOCUMENT, Boolean.toString(ap.canAssembleDocument()));
metadata.set(AccessPermissions.FILL_IN_FORM, Boolean.toString(ap.canFillInForm()));
metadata.set(AccessPermissions.CAN_MODIFY, Boolean.toString(ap.canModify()));
metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS, Boolean.toString(ap.canModifyAnnotations()));
metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));
//now go for the XMP
Document dom = loadDOM(document.getDocumentCatalog().getMetadata(), metadata, context);
XMPMetadata xmp = null;
if (dom != null) {
xmp = new XMPMetadata(dom);
}
XMPSchemaDublinCore dcSchema = null;
if (xmp != null) {
try {
dcSchema = xmp.getDublinCoreSchema();
} catch (IOException e) {
}
JempboxExtractor.extractXMPMM(xmp, metadata);
}
PDDocumentInformation info = document.getDocumentInformation();
metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema);
addMetadata(metadata, PDF.DOC_INFO_TITLE, info.getTitle());
extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema);
addMetadata(metadata, PDF.DOC_INFO_CREATOR, info.getAuthor());
extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator());
addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords());
addMetadata(metadata, "producer", info.getProducer());
addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer());
extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);
addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject());
// TODO: Move to description in Tika 2.0
addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
addMetadata(metadata, "trapped", info.getTrapped());
addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped());
// TODO Remove these in Tika 2.0
addMetadata(metadata, "created", info.getCreationDate());
addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate());
addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
Calendar modified = info.getModificationDate();
addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate());
// All remaining metadata is custom
// Copy this over as-is
List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", "Keywords", "Producer", "Subject", "Title", "Trapped");
for (COSName key : info.getCOSObject().keySet()) {
String name = key.getName();
if (!handledMetadata.contains(name)) {
addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key));
addMetadata(metadata, PDF.PDF_DOC_INFO_CUSTOM_PREFIX + name, info.getCOSObject().getDictionaryObject(key));
}
}
//try to get the various versions
//Caveats:
// there is currently a fair amount of redundancy
// TikaCoreProperties.FORMAT can be multivalued
// There are also three potential pdf specific version keys: pdf:PDFVersion, pdfa:PDFVersion, pdf:PDFExtensionVersion
metadata.set(PDF.PDF_VERSION, Float.toString(document.getDocument().getVersion()));
metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=" + Float.toString(document.getDocument().getVersion()));
try {
if (xmp != null) {
xmp.addXMLNSMapping(XMPSchemaPDFAId.NAMESPACE, XMPSchemaPDFAId.class);
XMPSchemaPDFAId pdfaxmp = (XMPSchemaPDFAId) xmp.getSchemaByClass(XMPSchemaPDFAId.class);
if (pdfaxmp != null) {
if (pdfaxmp.getPart() != null) {
metadata.set(PDF.PDFAID_PART, Integer.toString(pdfaxmp.getPart()));
}
if (pdfaxmp.getConformance() != null) {
metadata.set(PDF.PDFAID_CONFORMANCE, pdfaxmp.getConformance());
String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
metadata.set(PDF.PDFA_VERSION, version);
metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + version + "\"");
}
}
// TODO WARN if this XMP version is inconsistent with document header version?
}
} catch (IOException e) {
metadata.set(TikaCoreProperties.TIKA_META_PREFIX + "pdf:metadata-xmp-parse-failed", "" + e);
}
//TODO: Let's try to move this into PDFBox.
//Attempt to determine Adobe extension level, if present:
COSDictionary root = document.getDocumentCatalog().getCOSObject();
COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
if (extensions != null) {
for (COSName extName : extensions.keySet()) {
// If it's an Adobe one, interpret it to determine the extension level:
if (extName.equals(COSName.getPDFName("ADBE"))) {
COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
if (adobeExt != null) {
String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
//-1 is sentinel value that something went wrong in getInt
if (el != -1) {
metadata.set(PDF.PDF_EXTENSION_VERSION, baseVersion + " Adobe Extension Level " + el);
metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + baseVersion + " Adobe Extension Level " + el + "\"");
}
}
} else {
// WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
}
}
}
}
Aggregations