Search in sources :

Example 6 with PDEmbeddedFile

use of org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile in project mustangproject by ZUGFeRD.

the class ZUGFeRDImporter method extractFiles.

private void extractFiles(Map<String, PDComplexFileSpecification> names) throws IOException {
    for (final String alias : names.keySet()) {
        final PDComplexFileSpecification fileSpec = names.get(alias);
        final String filename = fileSpec.getFilename();
        /**
         * filenames for invoice data (ZUGFeRD v1 and v2, Factur-X)
         */
        if ((filename.equals("ZUGFeRD-invoice.xml") || (filename.equals("zugferd-invoice.xml")) || filename.equals("factur-x.xml")) || filename.equals("xrechnung.xml") || filename.equals("order-x.xml")) {
            containsMeta = true;
            final PDEmbeddedFile embeddedFile = fileSpec.getEmbeddedFile();
            // String embeddedFilename = filePath + filename;
            // File file = new File(filePath + filename);
            // System.out.println("Writing " + embeddedFilename);
            // ByteArrayOutputStream fileBytes=new
            // ByteArrayOutputStream();
            // FileOutputStream fos = new FileOutputStream(file);
            setRawXML(embeddedFile.toByteArray());
        // fos.write(embeddedFile.getByteArray());
        // fos.close();
        }
        if (filename.startsWith("additional_data")) {
            final PDEmbeddedFile embeddedFile = fileSpec.getEmbeddedFile();
            additionalXMLs.put(filename, embeddedFile.toByteArray());
        }
    }
}
Also used : PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) PDComplexFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification)

Example 7 with PDEmbeddedFile

use of org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile in project mustangproject by ZUGFeRD.

the class ZUGFeRDExporter method PDFAttachGenericFile.

/**
	 * Embeds an external file (generic - any type allowed) in the PDF.
	 *
	 * @param doc
	 *            PDDocument to attach the file to.
	 * @param filename
	 *            name of the file that will become attachment name in the PDF
	 * @param relationship
	 *            how the file relates to the content, e.g. "Alternative"
	 * @param description
	 *            Human-readable description of the file content
	 * @param subType
	 *            type of the data e.g. could be "text/xml" - mime like
	 * @param data
	 *            the binary data of the file/attachment
         * @throws java.io.IOException
	 */
public void PDFAttachGenericFile(PDDocument doc, String filename, String relationship, String description, String subType, byte[] data) throws IOException {
    PDComplexFileSpecification fs = new PDComplexFileSpecification();
    fs.setFile(filename);
    COSDictionary dict = fs.getCOSObject();
    dict.setName("AFRelationship", relationship);
    dict.setString("UF", filename);
    dict.setString("Desc", description);
    ByteArrayInputStream fakeFile = new ByteArrayInputStream(data);
    PDEmbeddedFile ef = new PDEmbeddedFile(doc, fakeFile);
    ef.setSubtype(subType);
    ef.setSize(data.length);
    ef.setCreationDate(new GregorianCalendar());
    ef.setModDate(GregorianCalendar.getInstance());
    fs.setEmbeddedFile(ef);
    // In addition make sure the embedded file is set under /UF
    dict = fs.getCOSObject();
    COSDictionary efDict = (COSDictionary) dict.getDictionaryObject(COSName.EF);
    COSBase lowerLevelFile = efDict.getItem(COSName.F);
    efDict.setItem(COSName.UF, lowerLevelFile);
    // now add the entry to the embedded file tree and set in the document.
    PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
    if (efTree == null) {
        efTree = new PDEmbeddedFilesNameTreeNode();
    }
    Map<String, PDComplexFileSpecification> namesMap = new HashMap<String, PDComplexFileSpecification>();
    Map<String, PDComplexFileSpecification> oldNamesMap = efTree.getNames();
    if (oldNamesMap != null) {
        for (String key : oldNamesMap.keySet()) {
            namesMap.put(key, oldNamesMap.get(key));
        }
    }
    namesMap.put(filename, fs);
    efTree.setNames(namesMap);
    names.setEmbeddedFiles(efTree);
    doc.getDocumentCatalog().setNames(names);
    // AF entry (Array) in catalog with the FileSpec
    COSArray cosArray = (COSArray) doc.getDocumentCatalog().getCOSObject().getItem("AF");
    if (cosArray == null) {
        cosArray = new COSArray();
    }
    cosArray.add(fs);
    COSDictionary dict2 = doc.getDocumentCatalog().getCOSObject();
    COSArray array = new COSArray();
    // see below
    array.add(fs.getCOSObject());
    dict2.setItem("AF", array);
    doc.getDocumentCatalog().getCOSObject().setItem("AF", cosArray);
}
Also used : COSDictionary(org.apache.pdfbox.cos.COSDictionary) COSArray(org.apache.pdfbox.cos.COSArray) ByteArrayInputStream(java.io.ByteArrayInputStream) PDEmbeddedFilesNameTreeNode(org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode) HashMap(java.util.HashMap) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) GregorianCalendar(java.util.GregorianCalendar) COSBase(org.apache.pdfbox.cos.COSBase) PDComplexFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification) PDDocumentNameDictionary(org.apache.pdfbox.pdmodel.PDDocumentNameDictionary)

Example 8 with PDEmbeddedFile

use of org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile in project pdfbox by apache.

the class ExtractEmbeddedFiles method main.

/**
 * This is the main method.
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {
    if (args.length != 1) {
        usage();
        System.exit(1);
    } else {
        PDDocument document = null;
        try {
            File pdfFile = new File(args[0]);
            String filePath = pdfFile.getParent() + System.getProperty("file.separator");
            document = PDDocument.load(pdfFile);
            PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
            PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
            if (efTree != null) {
                Map<String, PDComplexFileSpecification> names = efTree.getNames();
                if (names != null) {
                    extractFiles(names, filePath);
                } else {
                    List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                    for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                        names = node.getNames();
                        extractFiles(names, filePath);
                    }
                }
            }
            // extract files from annotations
            for (PDPage page : document.getPages()) {
                for (PDAnnotation annotation : page.getAnnotations()) {
                    if (annotation instanceof PDAnnotationFileAttachment) {
                        PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                        PDFileSpecification fileSpec = annotationFileAttachment.getFile();
                        if (fileSpec instanceof PDComplexFileSpecification) {
                            PDComplexFileSpecification complexFileSpec = (PDComplexFileSpecification) fileSpec;
                            PDEmbeddedFile embeddedFile = getEmbeddedFile(complexFileSpec);
                            extractFile(filePath, complexFileSpec.getFilename(), embeddedFile);
                        }
                    }
                }
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}
Also used : PDPage(org.apache.pdfbox.pdmodel.PDPage) PDAnnotationFileAttachment(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment) PDEmbeddedFilesNameTreeNode(org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) PDAnnotation(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation) PDComplexFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification) PDDocumentNameDictionary(org.apache.pdfbox.pdmodel.PDDocumentNameDictionary) PDFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) File(java.io.File) PDNameTreeNode(org.apache.pdfbox.pdmodel.common.PDNameTreeNode)

Example 9 with PDEmbeddedFile

use of org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile in project pdfbox by apache.

the class EndstreamOutputStreamTest method testPDFBox2079EmbeddedFile.

@Test
public void testPDFBox2079EmbeddedFile() throws IOException {
    // /Length entry removed to force usage of EndstreamOutputStream
    try (PDDocument doc = PDDocument.load(new File("src/test/resources/org/apache/pdfbox/pdfparser", "embedded_zip.pdf"))) {
        PDDocumentCatalog catalog = doc.getDocumentCatalog();
        PDDocumentNameDictionary names = catalog.getNames();
        PDEmbeddedFilesNameTreeNode node = names.getEmbeddedFiles();
        Map<String, PDComplexFileSpecification> map = node.getNames();
        Assert.assertEquals(1, map.size());
        PDComplexFileSpecification spec = map.get("My first attachment");
        PDEmbeddedFile file = spec.getEmbeddedFile();
        InputStream input = file.createInputStream();
        File d = new File("target/test-output");
        d.mkdirs();
        File f = new File(d, spec.getFile());
        try (OutputStream os = new FileOutputStream(f)) {
            IOUtils.copy(input, os);
        }
        Assert.assertEquals(17660, f.length());
    }
}
Also used : PDEmbeddedFilesNameTreeNode(org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) InputStream(java.io.InputStream) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) OutputStream(java.io.OutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) File(java.io.File) PDComplexFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog) PDDocumentNameDictionary(org.apache.pdfbox.pdmodel.PDDocumentNameDictionary) Test(org.junit.Test)

Example 10 with PDEmbeddedFile

use of org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile in project pdfbox by apache.

the class TestSymmetricKeyEncryption method extractEmbeddedFile.

// extract the embedded file, saves it, and return the extracted saved file
private File extractEmbeddedFile(InputStream pdfInputStream, String name) throws IOException {
    PDDocument docWithEmbeddedFile;
    docWithEmbeddedFile = PDDocument.load(pdfInputStream);
    PDDocumentCatalog catalog = docWithEmbeddedFile.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
    Map<String, PDComplexFileSpecification> embeddedFileNames = embeddedFiles.getNames();
    Assert.assertEquals(1, embeddedFileNames.size());
    Map.Entry<String, PDComplexFileSpecification> entry = embeddedFileNames.entrySet().iterator().next();
    LOG.info("Processing embedded file " + entry.getKey() + ":");
    PDComplexFileSpecification complexFileSpec = entry.getValue();
    PDEmbeddedFile embeddedFile = complexFileSpec.getEmbeddedFile();
    File resultFile = new File(testResultsDir, name);
    FileOutputStream fos = new FileOutputStream(resultFile);
    InputStream is = embeddedFile.createInputStream();
    IOUtils.copy(is, fos);
    fos.close();
    is.close();
    LOG.info("  size: " + embeddedFile.getSize());
    assertEquals(embeddedFile.getSize(), resultFile.length());
    return resultFile;
}
Also used : PDEmbeddedFilesNameTreeNode(org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) PDComplexFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog) PDDocumentNameDictionary(org.apache.pdfbox.pdmodel.PDDocumentNameDictionary) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) FileOutputStream(java.io.FileOutputStream) Map(java.util.Map) PDEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) File(java.io.File)

Aggregations

PDEmbeddedFile (org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile)13 PDComplexFileSpecification (org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification)12 PDDocumentNameDictionary (org.apache.pdfbox.pdmodel.PDDocumentNameDictionary)8 PDEmbeddedFilesNameTreeNode (org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode)8 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)7 File (java.io.File)5 PDDocumentCatalog (org.apache.pdfbox.pdmodel.PDDocumentCatalog)5 FileOutputStream (java.io.FileOutputStream)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 InputStream (java.io.InputStream)3 Test (org.junit.Test)3 GregorianCalendar (java.util.GregorianCalendar)2 Map (java.util.Map)2 PDPage (org.apache.pdfbox.pdmodel.PDPage)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1