use of org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode in project pdfbox by apache.
the class ExtractEmbeddedFiles method main.
/**
* This is the main method.
*
* @param args The command line arguments.
*
* @throws IOException If there is an error parsing the document.
*/
public static void main(String[] args) throws IOException {
if (args.length != 1) {
usage();
System.exit(1);
} else {
PDDocument document = null;
try {
File pdfFile = new File(args[0]);
String filePath = pdfFile.getParent() + System.getProperty("file.separator");
document = PDDocument.load(pdfFile);
PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
if (efTree != null) {
Map<String, PDComplexFileSpecification> names = efTree.getNames();
if (names != null) {
extractFiles(names, filePath);
} else {
List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
names = node.getNames();
extractFiles(names, filePath);
}
}
}
// extract files from annotations
for (PDPage page : document.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationFileAttachment) {
PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
PDFileSpecification fileSpec = annotationFileAttachment.getFile();
if (fileSpec instanceof PDComplexFileSpecification) {
PDComplexFileSpecification complexFileSpec = (PDComplexFileSpecification) fileSpec;
PDEmbeddedFile embeddedFile = getEmbeddedFile(complexFileSpec);
extractFile(filePath, complexFileSpec.getFilename(), embeddedFile);
}
}
}
}
} finally {
if (document != null) {
document.close();
}
}
}
}
use of org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode in project pdfbox by apache.
the class EndstreamOutputStreamTest method testPDFBox2079EmbeddedFile.
@Test
public void testPDFBox2079EmbeddedFile() throws IOException {
// /Length entry removed to force usage of EndstreamOutputStream
try (PDDocument doc = PDDocument.load(new File("src/test/resources/org/apache/pdfbox/pdfparser", "embedded_zip.pdf"))) {
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode node = names.getEmbeddedFiles();
Map<String, PDComplexFileSpecification> map = node.getNames();
Assert.assertEquals(1, map.size());
PDComplexFileSpecification spec = map.get("My first attachment");
PDEmbeddedFile file = spec.getEmbeddedFile();
InputStream input = file.createInputStream();
File d = new File("target/test-output");
d.mkdirs();
File f = new File(d, spec.getFile());
try (OutputStream os = new FileOutputStream(f)) {
IOUtils.copy(input, os);
}
Assert.assertEquals(17660, f.length());
}
}
use of org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode in project pdfbox by apache.
the class TestSymmetricKeyEncryption method extractEmbeddedFile.
// extract the embedded file, saves it, and return the extracted saved file
private File extractEmbeddedFile(InputStream pdfInputStream, String name) throws IOException {
PDDocument docWithEmbeddedFile;
docWithEmbeddedFile = PDDocument.load(pdfInputStream);
PDDocumentCatalog catalog = docWithEmbeddedFile.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
Map<String, PDComplexFileSpecification> embeddedFileNames = embeddedFiles.getNames();
Assert.assertEquals(1, embeddedFileNames.size());
Map.Entry<String, PDComplexFileSpecification> entry = embeddedFileNames.entrySet().iterator().next();
LOG.info("Processing embedded file " + entry.getKey() + ":");
PDComplexFileSpecification complexFileSpec = entry.getValue();
PDEmbeddedFile embeddedFile = complexFileSpec.getEmbeddedFile();
File resultFile = new File(testResultsDir, name);
FileOutputStream fos = new FileOutputStream(resultFile);
InputStream is = embeddedFile.createInputStream();
IOUtils.copy(is, fos);
fos.close();
is.close();
LOG.info(" size: " + embeddedFile.getSize());
assertEquals(embeddedFile.getSize(), resultFile.length());
return resultFile;
}
use of org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode in project pdfbox by apache.
the class TestEmbeddedFiles method testOSSpecificAttachments.
@Test
public void testOSSpecificAttachments() throws IOException {
PDEmbeddedFile nonOSFile = null;
PDEmbeddedFile macFile = null;
PDEmbeddedFile dosFile = null;
PDEmbeddedFile unixFile = null;
PDDocument doc = PDDocument.load(TestEmbeddedFiles.class.getResourceAsStream("testPDF_multiFormatEmbFiles.pdf"));
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode treeNode = names.getEmbeddedFiles();
List<PDNameTreeNode<PDComplexFileSpecification>> kids = treeNode.getKids();
for (PDNameTreeNode kid : kids) {
Map<String, PDComplexFileSpecification> tmpNames = kid.getNames();
COSObjectable obj = tmpNames.get("My first attachment");
PDComplexFileSpecification spec = (PDComplexFileSpecification) obj;
nonOSFile = spec.getEmbeddedFile();
macFile = spec.getEmbeddedFileMac();
dosFile = spec.getEmbeddedFileDos();
unixFile = spec.getEmbeddedFileUnix();
}
assertTrue("non os specific", byteArrayContainsLC("non os specific", nonOSFile.toByteArray(), "ISO-8859-1"));
assertTrue("mac", byteArrayContainsLC("mac embedded", macFile.toByteArray(), "ISO-8859-1"));
assertTrue("dos", byteArrayContainsLC("dos embedded", dosFile.toByteArray(), "ISO-8859-1"));
assertTrue("unix", byteArrayContainsLC("unix embedded", unixFile.toByteArray(), "ISO-8859-1"));
}
use of org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode in project pdfbox by apache.
the class TestEmbeddedFiles method testNullEmbeddedFile.
@Test
public void testNullEmbeddedFile() throws IOException {
PDEmbeddedFile embeddedFile = null;
boolean ok = false;
try {
PDDocument doc = PDDocument.load(TestEmbeddedFiles.class.getResourceAsStream("null_PDComplexFileSpecification.pdf"));
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
assertEquals("expected two files", 2, names.getEmbeddedFiles().getNames().size());
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
PDComplexFileSpecification spec = embeddedFiles.getNames().get("non-existent-file.docx");
if (spec != null) {
embeddedFile = spec.getEmbeddedFile();
ok = true;
}
// now test for actual attachment
spec = embeddedFiles.getNames().get("My first attachment");
assertNotNull("one attachment actually exists", spec);
assertEquals("existing file length", 17660, spec.getEmbeddedFile().getLength());
spec = embeddedFiles.getNames().get("non-existent-file.docx");
} catch (NullPointerException e) {
assertNotNull("null pointer exception", null);
}
assertTrue("Was able to get file without exception", ok);
assertNull("EmbeddedFile was correctly null", embeddedFile);
}
Aggregations