use of org.apache.pdfbox.pdmodel.common.PDNameTreeNode in project mustangproject by ZUGFeRD.
the class ZUGFeRDImporter method extractLowLevel.
/**
* Extracts a ZUGFeRD invoice from a PDF document represented by an input stream. Errors are reported via exception handling.
*
* @param pdfStream a inputstream of a pdf file
*/
private void extractLowLevel(InputStream pdfStream) throws IOException {
try (PDDocument doc = PDDocument.load(pdfStream)) {
// PDDocumentInformation info = doc.getDocumentInformation();
final PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
if (doc.getDocumentCatalog() == null || doc.getDocumentCatalog().getMetadata() == null) {
Logger.getLogger(ZUGFeRDImporter.class.getName()).log(Level.INFO, "no-xmlpart");
return;
}
final InputStream XMP = doc.getDocumentCatalog().getMetadata().exportXMPMetadata();
xmpString = convertStreamToString(XMP);
final PDEmbeddedFilesNameTreeNode etn = names.getEmbeddedFiles();
if (etn == null) {
return;
}
final Map<String, PDComplexFileSpecification> efMap = etn.getNames();
if (efMap != null) {
// see
extractFiles(efMap);
// https://memorynotfound.com/apache-pdfbox-extract-embedded-file-pdf-document/
} else {
final List<PDNameTreeNode<PDComplexFileSpecification>> kids = etn.getKids();
for (final PDNameTreeNode<PDComplexFileSpecification> node : kids) {
final Map<String, PDComplexFileSpecification> namesL = node.getNames();
extractFiles(namesL);
}
}
}
}
use of org.apache.pdfbox.pdmodel.common.PDNameTreeNode in project tika by apache.
the class AbstractPDF2XHTML method extractEmbeddedDocuments.
private void extractEmbeddedDocuments(PDDocument document) throws IOException, SAXException, TikaException {
PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
if (efTree == null) {
return;
}
Map<String, PDComplexFileSpecification> embeddedFileNames = efTree.getNames();
//Map<String, COSObjectable> that contains the doc info.
if (embeddedFileNames != null) {
processEmbeddedDocNames(embeddedFileNames);
} else {
List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
if (kids == null) {
return;
}
for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
embeddedFileNames = node.getNames();
if (embeddedFileNames != null) {
processEmbeddedDocNames(embeddedFileNames);
}
}
}
}
use of org.apache.pdfbox.pdmodel.common.PDNameTreeNode in project pdfbox by apache.
the class ExtractEmbeddedFiles method main.
/**
* This is the main method.
*
* @param args The command line arguments.
*
* @throws IOException If there is an error parsing the document.
*/
public static void main(String[] args) throws IOException {
if (args.length != 1) {
usage();
System.exit(1);
} else {
PDDocument document = null;
try {
File pdfFile = new File(args[0]);
String filePath = pdfFile.getParent() + System.getProperty("file.separator");
document = PDDocument.load(pdfFile);
PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
if (efTree != null) {
Map<String, PDComplexFileSpecification> names = efTree.getNames();
if (names != null) {
extractFiles(names, filePath);
} else {
List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
names = node.getNames();
extractFiles(names, filePath);
}
}
}
// extract files from annotations
for (PDPage page : document.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationFileAttachment) {
PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
PDFileSpecification fileSpec = annotationFileAttachment.getFile();
if (fileSpec instanceof PDComplexFileSpecification) {
PDComplexFileSpecification complexFileSpec = (PDComplexFileSpecification) fileSpec;
PDEmbeddedFile embeddedFile = getEmbeddedFile(complexFileSpec);
extractFile(filePath, complexFileSpec.getFilename(), embeddedFile);
}
}
}
}
} finally {
if (document != null) {
document.close();
}
}
}
}
Aggregations