Search in sources :

Example 1 with COSNumber

use of org.apache.pdfbox.cos.COSNumber in project pdfbox by apache.

the class PDFMergerUtility method appendDocument.

/**
 * append all pages from source to destination.
 *
 * @param destination the document to receive the pages
 * @param source the document originating the new pages
 *
 * @throws IOException If there is an error accessing data from either
 * document.
 */
public void appendDocument(PDDocument destination, PDDocument source) throws IOException {
    if (source.getDocument().isClosed()) {
        throw new IOException("Error: source PDF is closed.");
    }
    if (destination.getDocument().isClosed()) {
        throw new IOException("Error: destination PDF is closed.");
    }
    PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
    PDDocumentCatalog srcCatalog = source.getDocumentCatalog();
    if (isDynamicXfa(srcCatalog.getAcroForm())) {
        throw new IOException("Error: can't merge source document containing dynamic XFA form content.");
    }
    PDDocumentInformation destInfo = destination.getDocumentInformation();
    PDDocumentInformation srcInfo = source.getDocumentInformation();
    mergeInto(srcInfo.getCOSObject(), destInfo.getCOSObject(), Collections.<COSName>emptySet());
    // use the highest version number for the resulting pdf
    float destVersion = destination.getVersion();
    float srcVersion = source.getVersion();
    if (destVersion < srcVersion) {
        destination.setVersion(srcVersion);
    }
    int pageIndexOpenActionDest = -1;
    if (destCatalog.getOpenAction() == null) {
        // PDFBOX-3972: get local dest page index, it must be reassigned after the page cloning
        PDDestinationOrAction openAction = srcCatalog.getOpenAction();
        PDDestination openActionDestination = null;
        if (openAction instanceof PDActionGoTo) {
            openActionDestination = ((PDActionGoTo) openAction).getDestination();
        } else if (openAction instanceof PDDestination) {
            openActionDestination = (PDDestination) openAction;
        }
        if (openActionDestination instanceof PDPageDestination) {
            PDPage page = ((PDPageDestination) openActionDestination).getPage();
            if (page != null) {
                pageIndexOpenActionDest = srcCatalog.getPages().indexOf(page);
            }
        }
        destCatalog.setOpenAction(srcCatalog.getOpenAction());
    }
    PDFCloneUtility cloner = new PDFCloneUtility(destination);
    mergeAcroForm(cloner, destCatalog, srcCatalog);
    COSArray destThreads = (COSArray) destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS);
    COSArray srcThreads = (COSArray) cloner.cloneForNewDocument(destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS));
    if (destThreads == null) {
        destCatalog.getCOSObject().setItem(COSName.THREADS, srcThreads);
    } else {
        destThreads.addAll(srcThreads);
    }
    PDDocumentNameDictionary destNames = destCatalog.getNames();
    PDDocumentNameDictionary srcNames = srcCatalog.getNames();
    if (srcNames != null) {
        if (destNames == null) {
            destCatalog.getCOSObject().setItem(COSName.NAMES, cloner.cloneForNewDocument(srcNames));
        } else {
            cloner.cloneMerge(srcNames, destNames);
        }
    }
    PDDocumentNameDestinationDictionary destDests = destCatalog.getDests();
    PDDocumentNameDestinationDictionary srcDests = srcCatalog.getDests();
    if (srcDests != null) {
        if (destDests == null) {
            destCatalog.getCOSObject().setItem(COSName.DESTS, cloner.cloneForNewDocument(srcDests));
        } else {
            cloner.cloneMerge(srcDests, destDests);
        }
    }
    PDDocumentOutline destOutline = destCatalog.getDocumentOutline();
    PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline();
    if (srcOutline != null) {
        if (destOutline == null || destOutline.getFirstChild() == null) {
            PDDocumentOutline cloned = new PDDocumentOutline((COSDictionary) cloner.cloneForNewDocument(srcOutline));
            destCatalog.setDocumentOutline(cloned);
        } else {
            // search last sibling for dest, because /Last entry is sometimes wrong
            PDOutlineItem destLastOutlineItem = destOutline.getFirstChild();
            while (destLastOutlineItem.getNextSibling() != null) {
                destLastOutlineItem = destLastOutlineItem.getNextSibling();
            }
            for (PDOutlineItem item : srcOutline.children()) {
                // get each child, clone its dictionary, remove siblings info,
                // append outline item created from there
                COSDictionary clonedDict = (COSDictionary) cloner.cloneForNewDocument(item);
                clonedDict.removeItem(COSName.PREV);
                clonedDict.removeItem(COSName.NEXT);
                PDOutlineItem clonedItem = new PDOutlineItem(clonedDict);
                destLastOutlineItem.insertSiblingAfter(clonedItem);
                destLastOutlineItem = destLastOutlineItem.getNextSibling();
            }
        }
    }
    PageMode destPageMode = destCatalog.getPageMode();
    PageMode srcPageMode = srcCatalog.getPageMode();
    if (destPageMode == null) {
        destCatalog.setPageMode(srcPageMode);
    }
    COSDictionary destLabels = (COSDictionary) destCatalog.getCOSObject().getDictionaryObject(COSName.PAGE_LABELS);
    COSDictionary srcLabels = (COSDictionary) srcCatalog.getCOSObject().getDictionaryObject(COSName.PAGE_LABELS);
    if (srcLabels != null) {
        int destPageCount = destination.getNumberOfPages();
        COSArray destNums;
        if (destLabels == null) {
            destLabels = new COSDictionary();
            destNums = new COSArray();
            destLabels.setItem(COSName.NUMS, destNums);
            destCatalog.getCOSObject().setItem(COSName.PAGE_LABELS, destLabels);
        } else {
            destNums = (COSArray) destLabels.getDictionaryObject(COSName.NUMS);
        }
        COSArray srcNums = (COSArray) srcLabels.getDictionaryObject(COSName.NUMS);
        if (srcNums != null) {
            for (int i = 0; i < srcNums.size(); i += 2) {
                COSNumber labelIndex = (COSNumber) srcNums.getObject(i);
                long labelIndexValue = labelIndex.intValue();
                destNums.add(COSInteger.get(labelIndexValue + destPageCount));
                destNums.add(cloner.cloneForNewDocument(srcNums.getObject(i + 1)));
            }
        }
    }
    COSStream destMetadata = (COSStream) destCatalog.getCOSObject().getDictionaryObject(COSName.METADATA);
    COSStream srcMetadata = (COSStream) srcCatalog.getCOSObject().getDictionaryObject(COSName.METADATA);
    if (destMetadata == null && srcMetadata != null) {
        PDStream newStream = new PDStream(destination, srcMetadata.createInputStream(), (COSName) null);
        mergeInto(srcMetadata, newStream.getCOSObject(), new HashSet<>(Arrays.asList(COSName.FILTER, COSName.LENGTH)));
        destCatalog.getCOSObject().setItem(COSName.METADATA, newStream);
    }
    COSDictionary destOCP = (COSDictionary) destCatalog.getCOSObject().getDictionaryObject(COSName.OCPROPERTIES);
    COSDictionary srcOCP = (COSDictionary) srcCatalog.getCOSObject().getDictionaryObject(COSName.OCPROPERTIES);
    if (destOCP == null && srcOCP != null) {
        destCatalog.getCOSObject().setItem(COSName.OCPROPERTIES, cloner.cloneForNewDocument(srcOCP));
    }
    mergeOutputIntents(cloner, srcCatalog, destCatalog);
    // merge logical structure hierarchy if logical structure information is available in both source pdf and
    // destination pdf
    boolean mergeStructTree = false;
    int destParentTreeNextKey = -1;
    COSDictionary destParentTreeDict = null;
    COSDictionary srcParentTreeDict;
    COSArray destNumbersArray = null;
    COSArray srcNumbersArray = null;
    PDMarkInfo destMark = destCatalog.getMarkInfo();
    PDStructureTreeRoot destStructTree = destCatalog.getStructureTreeRoot();
    PDMarkInfo srcMark = srcCatalog.getMarkInfo();
    PDStructureTreeRoot srcStructTree = srcCatalog.getStructureTreeRoot();
    if (destStructTree != null) {
        PDNumberTreeNode destParentTree = destStructTree.getParentTree();
        destParentTreeNextKey = destStructTree.getParentTreeNextKey();
        if (destParentTree != null) {
            destParentTreeDict = destParentTree.getCOSObject();
            destNumbersArray = (COSArray) destParentTreeDict.getDictionaryObject(COSName.NUMS);
            if (destNumbersArray != null) {
                if (destParentTreeNextKey < 0) {
                    destParentTreeNextKey = destNumbersArray.size() / 2;
                }
                if (destParentTreeNextKey > 0 && srcStructTree != null) {
                    PDNumberTreeNode srcParentTree = srcStructTree.getParentTree();
                    if (srcParentTree != null) {
                        srcParentTreeDict = srcParentTree.getCOSObject();
                        srcNumbersArray = (COSArray) srcParentTreeDict.getDictionaryObject(COSName.NUMS);
                        if (srcNumbersArray != null) {
                            mergeStructTree = true;
                        }
                    }
                }
            }
        }
        if (destMark != null && destMark.isMarked() && !mergeStructTree) {
            destMark.setMarked(false);
        }
        if (!mergeStructTree) {
            destCatalog.setStructureTreeRoot(null);
        }
    }
    Map<COSDictionary, COSDictionary> objMapping = new HashMap<>();
    int pageIndex = 0;
    for (PDPage page : srcCatalog.getPages()) {
        PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSObject()));
        newPage.setCropBox(page.getCropBox());
        newPage.setMediaBox(page.getMediaBox());
        newPage.setRotation(page.getRotation());
        PDResources resources = page.getResources();
        if (resources != null) {
            // this is smart enough to just create references for resources that are used on multiple pages
            newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(resources)));
        } else {
            newPage.setResources(new PDResources());
        }
        if (mergeStructTree) {
            updateStructParentEntries(newPage, destParentTreeNextKey);
            objMapping.put(page.getCOSObject(), newPage.getCOSObject());
            List<PDAnnotation> oldAnnots = page.getAnnotations();
            List<PDAnnotation> newAnnots = newPage.getAnnotations();
            for (int i = 0; i < oldAnnots.size(); i++) {
                objMapping.put(oldAnnots.get(i).getCOSObject(), newAnnots.get(i).getCOSObject());
            }
        // TODO update mapping for XObjects
        }
        destination.addPage(newPage);
        if (pageIndex == pageIndexOpenActionDest) {
            // PDFBOX-3972: reassign the page.
            // The openAction is either a PDActionGoTo or a PDPageDestination
            PDDestinationOrAction openAction = destCatalog.getOpenAction();
            PDPageDestination pageDestination;
            if (openAction instanceof PDActionGoTo) {
                pageDestination = (PDPageDestination) ((PDActionGoTo) openAction).getDestination();
            } else {
                pageDestination = (PDPageDestination) openAction;
            }
            pageDestination.setPage(newPage);
        }
        ++pageIndex;
    }
    if (mergeStructTree) {
        updatePageReferences(srcNumbersArray, objMapping);
        for (int i = 0; i < srcNumbersArray.size() / 2; i++) {
            destNumbersArray.add(COSInteger.get(destParentTreeNextKey + i));
            destNumbersArray.add(srcNumbersArray.getObject(i * 2 + 1));
        }
        destParentTreeNextKey += srcNumbersArray.size() / 2;
        destParentTreeDict.setItem(COSName.NUMS, destNumbersArray);
        PDNumberTreeNode newParentTreeNode = new PDNumberTreeNode(destParentTreeDict, COSBase.class);
        destStructTree.setParentTree(newParentTreeNode);
        destStructTree.setParentTreeNextKey(destParentTreeNextKey);
        COSDictionary kDictLevel0 = new COSDictionary();
        COSArray newKArray = new COSArray();
        COSArray destKArray = destStructTree.getKArray();
        COSArray srcKArray = srcStructTree.getKArray();
        if (destKArray != null && srcKArray != null) {
            updateParentEntry(destKArray, kDictLevel0);
            newKArray.addAll(destKArray);
            updateParentEntry(srcKArray, kDictLevel0);
            newKArray.addAll(srcKArray);
        }
        kDictLevel0.setItem(COSName.K, newKArray);
        kDictLevel0.setItem(COSName.P, destStructTree);
        kDictLevel0.setItem(COSName.S, COSName.DOCUMENT);
        destStructTree.setK(kDictLevel0);
    }
}
Also used : COSStream(org.apache.pdfbox.cos.COSStream) PDPage(org.apache.pdfbox.pdmodel.PDPage) HashMap(java.util.HashMap) PDDestinationOrAction(org.apache.pdfbox.pdmodel.common.PDDestinationOrAction) PDStructureTreeRoot(org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot) PDPageDestination(org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination) PDMarkInfo(org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDMarkInfo) PDNumberTreeNode(org.apache.pdfbox.pdmodel.common.PDNumberTreeNode) COSArray(org.apache.pdfbox.cos.COSArray) COSNumber(org.apache.pdfbox.cos.COSNumber) PDDocumentInformation(org.apache.pdfbox.pdmodel.PDDocumentInformation) PDActionGoTo(org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo) PDDocumentNameDestinationDictionary(org.apache.pdfbox.pdmodel.PDDocumentNameDestinationDictionary) PageMode(org.apache.pdfbox.pdmodel.PageMode) COSDictionary(org.apache.pdfbox.cos.COSDictionary) PDDocumentOutline(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline) PDAnnotation(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation) PDResources(org.apache.pdfbox.pdmodel.PDResources) IOException(java.io.IOException) PDDestination(org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination) PDStream(org.apache.pdfbox.pdmodel.common.PDStream) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog) PDDocumentNameDictionary(org.apache.pdfbox.pdmodel.PDDocumentNameDictionary) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem)

Example 2 with COSNumber

use of org.apache.pdfbox.cos.COSNumber in project pdfbox by apache.

the class COSParser method parseCOSStream.

/**
 * This will read a COSStream from the input stream using length attribute within dictionary. If
 * length attribute is a indirect reference it is first resolved to get the stream length. This
 * means we copy stream data without testing for 'endstream' or 'endobj' and thus it is no
 * problem if these keywords occur within stream. We require 'endstream' to be found after
 * stream data is read.
 *
 * @param dic dictionary that goes with this stream.
 *
 * @return parsed pdf stream.
 *
 * @throws IOException if an error occurred reading the stream, like problems with reading
 * length attribute, stream does not end with 'endstream' after data read, stream too short etc.
 */
protected COSStream parseCOSStream(COSDictionary dic) throws IOException {
    COSStream stream = document.createCOSStream(dic);
    // read 'stream'; this was already tested in parseObjectsDynamically()
    readString();
    skipWhiteSpaces();
    /*
         * This needs to be dic.getItem because when we are parsing, the underlying object might still be null.
         */
    COSNumber streamLengthObj = getLength(dic.getItem(COSName.LENGTH), dic.getCOSName(COSName.TYPE));
    if (streamLengthObj == null) {
        if (isLenient) {
            LOG.warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " + source.getPosition());
        } else {
            throw new IOException("Missing length for stream.");
        }
    }
    // get output stream to copy data to
    try (OutputStream out = stream.createRawOutputStream()) {
        if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue())) {
            readValidStream(out, streamLengthObj);
        } else {
            readUntilEndStream(new EndstreamOutputStream(out));
        }
    }
    String endStream = readString();
    if (endStream.equals("endobj") && isLenient) {
        LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset " + source.getPosition());
        // avoid follow-up warning about missing endobj
        source.rewind(ENDOBJ.length);
    } else if (endStream.length() > 9 && isLenient && endStream.substring(0, 9).equals(ENDSTREAM_STRING)) {
        LOG.warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + source.getPosition());
        // unread the "extra" bytes
        source.rewind(endStream.substring(9).getBytes(ISO_8859_1).length);
    } else if (!endStream.equals(ENDSTREAM_STRING)) {
        throw new IOException("Error reading stream, expected='endstream' actual='" + endStream + "' at offset " + source.getPosition());
    }
    return stream;
}
Also used : COSStream(org.apache.pdfbox.cos.COSStream) OutputStream(java.io.OutputStream) COSNumber(org.apache.pdfbox.cos.COSNumber) IOException(java.io.IOException)

Example 3 with COSNumber

use of org.apache.pdfbox.cos.COSNumber in project pdfbox by apache.

the class COSWriter method prepareIncrement.

private void prepareIncrement(PDDocument doc) {
    if (doc != null) {
        COSDocument cosDoc = doc.getDocument();
        Map<COSObjectKey, Long> xrefTable = cosDoc.getXrefTable();
        Set<COSObjectKey> keySet = xrefTable.keySet();
        long highestNumber = doc.getDocument().getHighestXRefObjectNumber();
        for (COSObjectKey cosObjectKey : keySet) {
            COSBase object = cosDoc.getObjectFromPool(cosObjectKey).getObject();
            if (object != null && cosObjectKey != null && !(object instanceof COSNumber)) {
                objectKeys.put(object, cosObjectKey);
                keyObject.put(cosObjectKey, object);
            }
            if (cosObjectKey != null) {
                long num = cosObjectKey.getNumber();
                if (num > highestNumber) {
                    highestNumber = num;
                }
            }
        }
        setNumber(highestNumber);
    }
}
Also used : COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) COSNumber(org.apache.pdfbox.cos.COSNumber) COSDocument(org.apache.pdfbox.cos.COSDocument) COSBase(org.apache.pdfbox.cos.COSBase)

Example 4 with COSNumber

use of org.apache.pdfbox.cos.COSNumber in project pdfbox by apache.

the class COSArrayList method convertFloatCOSArrayToList.

/**
 * This will take an array of COSNumbers and return a COSArrayList of
 * java.lang.Float values.
 *
 * @param floatArray The existing float Array.
 *
 * @return The list of Float objects.
 */
public static List<Float> convertFloatCOSArrayToList(COSArray floatArray) {
    List<Float> retval = null;
    if (floatArray != null) {
        List<Float> numbers = new ArrayList<>(floatArray.size());
        for (int i = 0; i < floatArray.size(); i++) {
            COSBase base = floatArray.getObject(i);
            if (base instanceof COSNumber) {
                numbers.add(((COSNumber) base).floatValue());
            } else {
                numbers.add(null);
            }
        }
        retval = new COSArrayList<>(numbers, floatArray);
    }
    return retval;
}
Also used : COSFloat(org.apache.pdfbox.cos.COSFloat) ArrayList(java.util.ArrayList) COSNumber(org.apache.pdfbox.cos.COSNumber) COSBase(org.apache.pdfbox.cos.COSBase)

Example 5 with COSNumber

use of org.apache.pdfbox.cos.COSNumber in project pdfbox by apache.

the class FDFAnnotation method getPage.

/**
 * This will get the page number or null if it does not exist.
 *
 * @return The page number.
 */
public Integer getPage() {
    Integer retval = null;
    COSNumber page = (COSNumber) annot.getDictionaryObject(COSName.PAGE);
    if (page != null) {
        retval = page.intValue();
    }
    return retval;
}
Also used : COSNumber(org.apache.pdfbox.cos.COSNumber)

Aggregations

COSNumber (org.apache.pdfbox.cos.COSNumber)61 COSBase (org.apache.pdfbox.cos.COSBase)29 MissingOperandException (org.apache.pdfbox.contentstream.operator.MissingOperandException)18 COSArray (org.apache.pdfbox.cos.COSArray)18 COSInteger (org.apache.pdfbox.cos.COSInteger)10 Point2D (java.awt.geom.Point2D)6 IOException (java.io.IOException)6 COSObject (org.apache.pdfbox.cos.COSObject)5 PDBorderStyleDictionary (org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary)5 ArrayList (java.util.ArrayList)4 COSDictionary (org.apache.pdfbox.cos.COSDictionary)4 COSName (org.apache.pdfbox.cos.COSName)4 COSObjectKey (org.apache.pdfbox.cos.COSObjectKey)4 PDFont (org.apache.pdfbox.pdmodel.font.PDFont)4 COSFloat (org.apache.pdfbox.cos.COSFloat)3 COSStream (org.apache.pdfbox.cos.COSStream)2 PDRectangle (org.apache.pdfbox.pdmodel.common.PDRectangle)2 ValidationError (org.apache.pdfbox.preflight.ValidationResult.ValidationError)2 Matrix (org.apache.pdfbox.util.Matrix)2 Paint (java.awt.Paint)1