use of com.tom_roush.pdfbox.pdmodel.PDDocumentNameDictionary in project PdfBox-Android by TomRoush.
the class TestEmbeddedFiles method testNullEmbeddedFile.
@Test
public void testNullEmbeddedFile() throws IOException {
PDEmbeddedFile embeddedFile = null;
boolean ok = false;
try {
PDDocument doc = PDDocument.load(getClass().getResourceAsStream("/pdfbox/com/tom_roush/pdfbox/pdmodel/common/null_PDComplexFileSpecification.pdf"));
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
assertEquals("expected two files", 2, names.getEmbeddedFiles().getNames().size());
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
PDComplexFileSpecification spec = embeddedFiles.getNames().get("non-existent-file.docx");
if (spec != null) {
embeddedFile = spec.getEmbeddedFile();
ok = true;
}
// now test for actual attachment
spec = embeddedFiles.getNames().get("My first attachment");
assertNotNull("one attachment actually exists", spec);
assertEquals("existing file length", 17660, spec.getEmbeddedFile().getLength());
spec = embeddedFiles.getNames().get("non-existent-file.docx");
} catch (NullPointerException e) {
assertNotNull("null pointer exception", null);
}
assertTrue("Was able to get file without exception", ok);
assertNull("EmbeddedFile was correctly null", embeddedFile);
}
use of com.tom_roush.pdfbox.pdmodel.PDDocumentNameDictionary in project PdfBox-Android by TomRoush.
the class TestSymmetricKeyEncryption method extractEmbeddedFile.
// extract the embedded file, saves it, and return the extracted saved file
private File extractEmbeddedFile(InputStream pdfInputStream, String name) throws IOException {
PDDocument docWithEmbeddedFile;
docWithEmbeddedFile = PDDocument.load(pdfInputStream);
PDDocumentCatalog catalog = docWithEmbeddedFile.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
Map<String, PDComplexFileSpecification> embeddedFileNames = embeddedFiles.getNames();
assertEquals(1, embeddedFileNames.size());
Map.Entry<String, PDComplexFileSpecification> entry = embeddedFileNames.entrySet().iterator().next();
Log.i("PdfBox-Android", "Processing embedded file " + entry.getKey() + ":");
PDComplexFileSpecification complexFileSpec = entry.getValue();
PDEmbeddedFile embeddedFile = complexFileSpec.getEmbeddedFile();
File resultFile = new File(testResultsDir, name);
FileOutputStream fos = new FileOutputStream(resultFile);
InputStream is = embeddedFile.createInputStream();
IOUtils.copy(is, fos);
fos.close();
is.close();
Log.i("PdfBox-Android", " size: " + embeddedFile.getSize());
assertEquals(embeddedFile.getSize(), resultFile.length());
return resultFile;
}
use of com.tom_roush.pdfbox.pdmodel.PDDocumentNameDictionary in project PdfBox-Android by TomRoush.
the class EndstreamOutputStreamTest method testPDFBox2079EmbeddedFile.
@Test
public void testPDFBox2079EmbeddedFile() throws IOException {
// there should be 17660 bytes in the zip file.
// in PDFBox 1.8.5, windows newline is appended to the byte stream
// yielding 17662 bytes, which causes a problem for ZipFile in Java 1.6
// Modification of embedded_zip.pdf for 2.0:
// /Length entry removed to force usage of EndstreamOutputStream
PDDocument doc = PDDocument.load(new File("src/test/resources/pdfbox/com/tom_roush/pdfbox/pdfparser", "embedded_zip.pdf"));
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode node = names.getEmbeddedFiles();
Map<String, PDComplexFileSpecification> map = node.getNames();
Assert.assertEquals(1, map.size());
PDComplexFileSpecification spec = map.get("My first attachment");
PDEmbeddedFile file = spec.getEmbeddedFile();
InputStream input = file.createInputStream();
File d = new File("target/test-output");
d.mkdirs();
File f = new File(d, spec.getFile());
OutputStream os = new FileOutputStream(f);
IOUtils.copy(input, os);
os.close();
Assert.assertEquals(17660, f.length());
doc.close();
}
use of com.tom_roush.pdfbox.pdmodel.PDDocumentNameDictionary in project PdfBox-Android by TomRoush.
the class PDFMergerUtility method appendDocument.
/**
* append all pages from source to destination.
*
* @param destination the document to receive the pages
* @param source the document originating the new pages
*
* @throws IOException If there is an error accessing data from either
* document.
*/
public void appendDocument(PDDocument destination, PDDocument source) throws IOException {
if (source.getDocument().isClosed()) {
throw new IOException("Error: source PDF is closed.");
}
if (destination.getDocument().isClosed()) {
throw new IOException("Error: destination PDF is closed.");
}
PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
PDDocumentCatalog srcCatalog = source.getDocumentCatalog();
if (isDynamicXfa(srcCatalog.getAcroForm())) {
throw new IOException("Error: can't merge source document containing dynamic XFA form content.");
}
PDDocumentInformation destInfo = destination.getDocumentInformation();
PDDocumentInformation srcInfo = source.getDocumentInformation();
mergeInto(srcInfo.getCOSObject(), destInfo.getCOSObject(), Collections.<COSName>emptySet());
// use the highest version number for the resulting pdf
float destVersion = destination.getVersion();
float srcVersion = source.getVersion();
if (destVersion < srcVersion) {
destination.setVersion(srcVersion);
}
int pageIndexOpenActionDest = -1;
if (destCatalog.getOpenAction() == null) {
// PDFBOX-3972: get local dest page index, it must be reassigned after the page cloning
PDDestinationOrAction openAction = null;
try {
openAction = srcCatalog.getOpenAction();
} catch (IOException ex) {
// PDFBOX-4223
Log.e("PdfBox-Android", "Invalid OpenAction ignored", ex);
}
PDDestination openActionDestination = null;
if (openAction instanceof PDActionGoTo) {
openActionDestination = ((PDActionGoTo) openAction).getDestination();
} else if (openAction instanceof PDDestination) {
openActionDestination = (PDDestination) openAction;
}
if (openActionDestination instanceof PDPageDestination) {
PDPage page = ((PDPageDestination) openActionDestination).getPage();
if (page != null) {
pageIndexOpenActionDest = srcCatalog.getPages().indexOf(page);
}
}
destCatalog.setOpenAction(openAction);
}
PDFCloneUtility cloner = new PDFCloneUtility(destination);
mergeAcroForm(cloner, destCatalog, srcCatalog);
COSArray destThreads = (COSArray) destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS);
COSArray srcThreads = (COSArray) cloner.cloneForNewDocument(destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS));
if (destThreads == null) {
destCatalog.getCOSObject().setItem(COSName.THREADS, srcThreads);
} else {
destThreads.addAll(srcThreads);
}
PDDocumentNameDictionary destNames = destCatalog.getNames();
PDDocumentNameDictionary srcNames = srcCatalog.getNames();
if (srcNames != null) {
if (destNames == null) {
destCatalog.getCOSObject().setItem(COSName.NAMES, cloner.cloneForNewDocument(srcNames));
} else {
cloner.cloneMerge(srcNames, destNames);
}
}
if (destNames != null) {
// found in 054080.pdf from PDFBOX-4417 and doesn't belong there
destNames.getCOSObject().removeItem(COSName.ID_TREE);
Log.w("PdfBox-Android", "Removed /IDTree from /Names dictionary, doesn't belong there");
}
PDDocumentNameDestinationDictionary destDests = destCatalog.getDests();
PDDocumentNameDestinationDictionary srcDests = srcCatalog.getDests();
if (srcDests != null) {
if (destDests == null) {
destCatalog.getCOSObject().setItem(COSName.DESTS, cloner.cloneForNewDocument(srcDests));
} else {
cloner.cloneMerge(srcDests, destDests);
}
}
PDDocumentOutline destOutline = destCatalog.getDocumentOutline();
PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline();
if (srcOutline != null) {
if (destOutline == null || destOutline.getFirstChild() == null) {
PDDocumentOutline cloned = new PDDocumentOutline((COSDictionary) cloner.cloneForNewDocument(srcOutline));
destCatalog.setDocumentOutline(cloned);
} else {
// search last sibling for dest, because /Last entry is sometimes wrong
PDOutlineItem destLastOutlineItem = destOutline.getFirstChild();
while (destLastOutlineItem.getNextSibling() != null) {
destLastOutlineItem = destLastOutlineItem.getNextSibling();
}
for (PDOutlineItem item : srcOutline.children()) {
// get each child, clone its dictionary, remove siblings info,
// append outline item created from there
COSDictionary clonedDict = (COSDictionary) cloner.cloneForNewDocument(item);
clonedDict.removeItem(COSName.PREV);
clonedDict.removeItem(COSName.NEXT);
PDOutlineItem clonedItem = new PDOutlineItem(clonedDict);
destLastOutlineItem.insertSiblingAfter(clonedItem);
destLastOutlineItem = destLastOutlineItem.getNextSibling();
}
}
}
PageMode destPageMode = destCatalog.getPageMode();
PageMode srcPageMode = srcCatalog.getPageMode();
if (destPageMode == null) {
destCatalog.setPageMode(srcPageMode);
}
COSDictionary destLabels = destCatalog.getCOSObject().getCOSDictionary(COSName.PAGE_LABELS);
COSDictionary srcLabels = srcCatalog.getCOSObject().getCOSDictionary(COSName.PAGE_LABELS);
if (srcLabels != null) {
int destPageCount = destination.getNumberOfPages();
COSArray destNums;
if (destLabels == null) {
destLabels = new COSDictionary();
destNums = new COSArray();
destLabels.setItem(COSName.NUMS, destNums);
destCatalog.getCOSObject().setItem(COSName.PAGE_LABELS, destLabels);
} else {
destNums = (COSArray) destLabels.getDictionaryObject(COSName.NUMS);
}
COSArray srcNums = (COSArray) srcLabels.getDictionaryObject(COSName.NUMS);
if (srcNums != null) {
int startSize = destNums.size();
for (int i = 0; i < srcNums.size(); i += 2) {
COSBase base = srcNums.getObject(i);
if (!(base instanceof COSNumber)) {
Log.e("PdfBox-Android", "page labels ignored, index " + i + " should be a number, but is " + base);
// remove what we added
while (destNums.size() > startSize) {
destNums.remove(startSize);
}
break;
}
COSNumber labelIndex = (COSNumber) base;
long labelIndexValue = labelIndex.intValue();
destNums.add(COSInteger.get(labelIndexValue + destPageCount));
destNums.add(cloner.cloneForNewDocument(srcNums.getObject(i + 1)));
}
}
}
COSStream destMetadata = destCatalog.getCOSObject().getCOSStream(COSName.METADATA);
COSStream srcMetadata = srcCatalog.getCOSObject().getCOSStream(COSName.METADATA);
if (destMetadata == null && srcMetadata != null) {
try {
PDStream newStream = new PDStream(destination, srcMetadata.createInputStream(), (COSName) null);
mergeInto(srcMetadata, newStream.getCOSObject(), new HashSet<COSName>(Arrays.asList(COSName.FILTER, COSName.LENGTH)));
destCatalog.getCOSObject().setItem(COSName.METADATA, newStream);
} catch (IOException ex) {
// PDFBOX-4227 cleartext XMP stream with /Flate
Log.e("PdfBox-Android", "Metadata skipped because it could not be read", ex);
}
}
COSDictionary destOCP = destCatalog.getCOSObject().getCOSDictionary(COSName.OCPROPERTIES);
COSDictionary srcOCP = srcCatalog.getCOSObject().getCOSDictionary(COSName.OCPROPERTIES);
if (destOCP == null && srcOCP != null) {
destCatalog.getCOSObject().setItem(COSName.OCPROPERTIES, cloner.cloneForNewDocument(srcOCP));
} else if (destOCP != null && srcOCP != null) {
cloner.cloneMerge(srcOCP, destOCP);
}
mergeOutputIntents(cloner, srcCatalog, destCatalog);
// merge logical structure hierarchy
boolean mergeStructTree = false;
int destParentTreeNextKey = -1;
Map<Integer, COSObjectable> srcNumberTreeAsMap = null;
Map<Integer, COSObjectable> destNumberTreeAsMap = null;
PDStructureTreeRoot srcStructTree = srcCatalog.getStructureTreeRoot();
PDStructureTreeRoot destStructTree = destCatalog.getStructureTreeRoot();
if (destStructTree == null && srcStructTree != null) {
// create a dummy structure tree in the destination, so that the source
// tree is cloned. (We can't just copy the tree reference due to PDFBOX-3999)
destStructTree = new PDStructureTreeRoot();
destCatalog.setStructureTreeRoot(destStructTree);
destStructTree.setParentTree(new PDNumberTreeNode(PDParentTreeValue.class));
// PDFBOX-4429: remove bogus StructParent(s)
for (PDPage page : destCatalog.getPages()) {
page.getCOSObject().removeItem(COSName.STRUCT_PARENTS);
for (PDAnnotation ann : page.getAnnotations()) {
ann.getCOSObject().removeItem(COSName.STRUCT_PARENT);
}
}
}
if (destStructTree != null) {
PDNumberTreeNode destParentTree = destStructTree.getParentTree();
destParentTreeNextKey = destStructTree.getParentTreeNextKey();
if (destParentTree != null) {
destNumberTreeAsMap = getNumberTreeAsMap(destParentTree);
if (destParentTreeNextKey < 0) {
if (destNumberTreeAsMap.isEmpty()) {
destParentTreeNextKey = 0;
} else {
destParentTreeNextKey = Collections.max(destNumberTreeAsMap.keySet()) + 1;
}
}
if (destParentTreeNextKey >= 0 && srcStructTree != null) {
PDNumberTreeNode srcParentTree = srcStructTree.getParentTree();
if (srcParentTree != null) {
srcNumberTreeAsMap = getNumberTreeAsMap(srcParentTree);
if (!srcNumberTreeAsMap.isEmpty()) {
mergeStructTree = true;
}
}
}
}
}
Map<COSDictionary, COSDictionary> objMapping = new HashMap<COSDictionary, COSDictionary>();
int pageIndex = 0;
for (PDPage page : srcCatalog.getPages()) {
PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSObject()));
if (!mergeStructTree) {
// PDFBOX-4429: remove bogus StructParent(s)
newPage.getCOSObject().removeItem(COSName.STRUCT_PARENTS);
for (PDAnnotation ann : newPage.getAnnotations()) {
ann.getCOSObject().removeItem(COSName.STRUCT_PARENT);
}
}
newPage.setCropBox(page.getCropBox());
newPage.setMediaBox(page.getMediaBox());
newPage.setRotation(page.getRotation());
PDResources resources = page.getResources();
if (resources != null) {
// this is smart enough to just create references for resources that are used on multiple pages
newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(resources)));
} else {
newPage.setResources(new PDResources());
}
if (mergeStructTree) {
// add the value of the destination ParentTreeNextKey to every source element
// StructParent(s) value so that these don't overlap with the existing values
updateStructParentEntries(newPage, destParentTreeNextKey);
objMapping.put(page.getCOSObject(), newPage.getCOSObject());
List<PDAnnotation> oldAnnots = page.getAnnotations();
List<PDAnnotation> newAnnots = newPage.getAnnotations();
for (int i = 0; i < oldAnnots.size(); i++) {
objMapping.put(oldAnnots.get(i).getCOSObject(), newAnnots.get(i).getCOSObject());
}
// TODO update mapping for XObjects
}
destination.addPage(newPage);
if (pageIndex == pageIndexOpenActionDest) {
// PDFBOX-3972: reassign the page.
// The openAction is either a PDActionGoTo or a PDPageDestination
PDDestinationOrAction openAction = destCatalog.getOpenAction();
PDPageDestination pageDestination;
if (openAction instanceof PDActionGoTo) {
pageDestination = (PDPageDestination) ((PDActionGoTo) openAction).getDestination();
} else {
pageDestination = (PDPageDestination) openAction;
}
pageDestination.setPage(newPage);
}
++pageIndex;
}
if (mergeStructTree) {
updatePageReferences(cloner, srcNumberTreeAsMap, objMapping);
int maxSrcKey = -1;
for (Map.Entry<Integer, COSObjectable> entry : srcNumberTreeAsMap.entrySet()) {
int srcKey = entry.getKey();
maxSrcKey = Math.max(srcKey, maxSrcKey);
destNumberTreeAsMap.put(destParentTreeNextKey + srcKey, cloner.cloneForNewDocument(entry.getValue()));
}
destParentTreeNextKey += maxSrcKey + 1;
PDNumberTreeNode newParentTreeNode = new PDNumberTreeNode(PDParentTreeValue.class);
// Note that all elements are stored flatly. This could become a problem for large files
// when these are opened in a viewer that uses the tagging information.
// If this happens, then ​PDNumberTreeNode should be improved with a convenience method that
// stores the map into a B+Tree, see https://en.wikipedia.org/wiki/B+_tree
newParentTreeNode.setNumbers(destNumberTreeAsMap);
destStructTree.setParentTree(newParentTreeNode);
destStructTree.setParentTreeNextKey(destParentTreeNextKey);
mergeKEntries(cloner, srcStructTree, destStructTree);
mergeRoleMap(srcStructTree, destStructTree);
mergeIDTree(cloner, srcStructTree, destStructTree);
mergeMarkInfo(destCatalog, srcCatalog);
mergeLanguage(destCatalog, srcCatalog);
mergeViewerPreferences(destCatalog, srcCatalog);
}
}
use of com.tom_roush.pdfbox.pdmodel.PDDocumentNameDictionary in project PdfBox-Android by TomRoush.
the class TestEmbeddedFiles method testOSSpecificAttachments.
@Test
public void testOSSpecificAttachments() throws IOException {
PDEmbeddedFile nonOSFile = null;
PDEmbeddedFile macFile = null;
PDEmbeddedFile dosFile = null;
PDEmbeddedFile unixFile = null;
PDDocument doc = PDDocument.load(TestEmbeddedFiles.class.getResourceAsStream("/pdfbox/com/tom_roush/pdfbox/pdmodel/common/testPDF_multiFormatEmbFiles.pdf"));
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode treeNode = names.getEmbeddedFiles();
List<PDNameTreeNode<PDComplexFileSpecification>> kids = treeNode.getKids();
for (PDNameTreeNode<PDComplexFileSpecification> kid : kids) {
Map<String, PDComplexFileSpecification> tmpNames = kid.getNames();
COSObjectable obj = tmpNames.get("My first attachment");
PDComplexFileSpecification spec = (PDComplexFileSpecification) obj;
nonOSFile = spec.getEmbeddedFile();
macFile = spec.getEmbeddedFileMac();
dosFile = spec.getEmbeddedFileDos();
unixFile = spec.getEmbeddedFileUnix();
}
assertTrue("non os specific", byteArrayContainsLC("non os specific", nonOSFile.toByteArray(), "ISO-8859-1"));
assertTrue("mac", byteArrayContainsLC("mac embedded", macFile.toByteArray(), "ISO-8859-1"));
assertTrue("dos", byteArrayContainsLC("dos embedded", dosFile.toByteArray(), "ISO-8859-1"));
assertTrue("unix", byteArrayContainsLC("unix embedded", unixFile.toByteArray(), "ISO-8859-1"));
}
Aggregations