Search in sources :

Example 16 with COSObjectKey

use of com.tom_roush.pdfbox.cos.COSObjectKey in project PdfBox-Android by TomRoush.

the class COSParser method parseDictObjects.

/**
 * Will parse every object necessary to load a single page from the pdf document. We try our
 * best to order objects according to offset in file before reading to minimize seek operations.
 *
 * @param dict the COSObject from the parent pages.
 * @param excludeObjects dictionary object reference entries with these names will not be parsed
 *
 * @throws IOException if something went wrong
 */
protected void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException {
    // ---- create queue for objects waiting for further parsing
    final Queue<COSBase> toBeParsedList = new LinkedList<COSBase>();
    // offset ordered object map
    final TreeMap<Long, List<COSObject>> objToBeParsed = new TreeMap<Long, List<COSObject>>();
    // in case of compressed objects offset points to stmObj
    final Set<Long> parsedObjects = new HashSet<Long>();
    final Set<Long> addedObjects = new HashSet<Long>();
    addExcludedToList(excludeObjects, dict, parsedObjects);
    addNewToList(toBeParsedList, dict.getValues(), addedObjects);
    // ---- go through objects to be parsed
    while (!(toBeParsedList.isEmpty() && objToBeParsed.isEmpty())) {
        // -- first get all COSObject from other kind of objects and
        // put them in objToBeParsed; afterwards toBeParsedList is empty
        COSBase baseObj;
        while ((baseObj = toBeParsedList.poll()) != null) {
            if (baseObj instanceof COSDictionary) {
                addNewToList(toBeParsedList, ((COSDictionary) baseObj).getValues(), addedObjects);
            } else if (baseObj instanceof COSArray) {
                for (COSBase cosBase : (COSArray) baseObj) {
                    addNewToList(toBeParsedList, cosBase, addedObjects);
                }
            } else if (baseObj instanceof COSObject) {
                COSObject obj = (COSObject) baseObj;
                long objId = getObjectId(obj);
                COSObjectKey objKey = new COSObjectKey(obj.getObjectNumber(), obj.getGenerationNumber());
                if (!parsedObjects.contains(objId)) {
                    Long fileOffset = document.getXrefTable().get(objKey);
                    if (fileOffset == null && isLenient && bfSearchCOSObjectKeyOffsets != null) {
                        fileOffset = bfSearchCOSObjectKeyOffsets.get(objKey);
                        if (fileOffset != null) {
                            Log.d("PdfBox-Android", "Set missing " + fileOffset + " for object " + objKey);
                            document.getXrefTable().put(objKey, fileOffset);
                        }
                    }
                    // thus we have to test
                    if (fileOffset != null && fileOffset != 0) {
                        if (fileOffset > 0) {
                            objToBeParsed.put(fileOffset, Collections.singletonList(obj));
                        } else {
                            // negative offset means we have a compressed
                            // object within object stream => get offset of object stream
                            COSObjectKey key = new COSObjectKey((int) -fileOffset, 0);
                            fileOffset = document.getXrefTable().get(key);
                            if (fileOffset == null || fileOffset <= 0) {
                                if (isLenient && bfSearchCOSObjectKeyOffsets != null) {
                                    fileOffset = bfSearchCOSObjectKeyOffsets.get(key);
                                    if (fileOffset != null) {
                                        Log.d("PdfBox-Android", "Set missing " + fileOffset + " for object " + key);
                                        document.getXrefTable().put(key, fileOffset);
                                    } else {
                                        Log.w("PdfBox-Android", "Invalid object stream xref object reference for key '" + objKey + "': " + fileOffset);
                                        continue;
                                    }
                                } else {
                                    String msg = "Invalid object stream xref object reference for key '" + objKey + "': " + fileOffset;
                                    if (isLenient && fileOffset == null) {
                                        Log.w("PdfBox-Android", msg);
                                        continue;
                                    }
                                    throw new IOException(msg);
                                }
                            }
                            List<COSObject> stmObjects = objToBeParsed.get(fileOffset);
                            if (stmObjects == null) {
                                stmObjects = new ArrayList<COSObject>();
                                objToBeParsed.put(fileOffset, stmObjects);
                            } else // java does not have a test for immutable
                            if (!(stmObjects instanceof ArrayList)) {
                                throw new IOException(obj + " cannot be assigned to offset " + fileOffset + ", this belongs to " + stmObjects.get(0));
                            }
                            stmObjects.add(obj);
                        }
                    } else {
                        // NULL object
                        COSObject pdfObject = document.getObjectFromPool(objKey);
                        pdfObject.setObject(COSNull.NULL);
                    }
                }
            }
        }
        // resulting object will be added to toBeParsedList
        if (objToBeParsed.isEmpty()) {
            break;
        }
        for (COSObject obj : objToBeParsed.remove(objToBeParsed.firstKey())) {
            COSBase parsedObj = parseObjectDynamically(obj, false);
            if (parsedObj != null) {
                obj.setObject(parsedObj);
                addNewToList(toBeParsedList, parsedObj, addedObjects);
                parsedObjects.add(getObjectId(obj));
            }
        }
    }
}
Also used : COSDictionary(com.tom_roush.pdfbox.cos.COSDictionary) ArrayList(java.util.ArrayList) IOException(java.io.IOException) TreeMap(java.util.TreeMap) LinkedList(java.util.LinkedList) COSObjectKey(com.tom_roush.pdfbox.cos.COSObjectKey) COSArray(com.tom_roush.pdfbox.cos.COSArray) COSObject(com.tom_roush.pdfbox.cos.COSObject) COSBase(com.tom_roush.pdfbox.cos.COSBase) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashSet(java.util.HashSet)

Example 17 with COSObjectKey

use of com.tom_roush.pdfbox.cos.COSObjectKey in project PdfBox-Android by TomRoush.

the class COSParser method bfSearchForObjStreams.

/**
 * Brute force search for all object streams.
 *
 * @throws IOException if something went wrong
 */
private void bfSearchForObjStreams() throws IOException {
    HashMap<Long, COSObjectKey> bfSearchObjStreamsOffsets = new HashMap<Long, COSObjectKey>();
    long originOffset = source.getPosition();
    source.seek(MINIMUM_SEARCH_OFFSET);
    char[] string = " obj".toCharArray();
    while (!source.isEOF()) {
        // search for EOF marker
        if (isString(OBJ_STREAM)) {
            long currentPosition = source.getPosition();
            // search backwards for the beginning of the object
            long newOffset = -1;
            boolean objFound = false;
            for (int i = 1; i < 40 && !objFound; i++) {
                long currentOffset = currentPosition - (i * 10);
                if (currentOffset > 0) {
                    source.seek(currentOffset);
                    for (int j = 0; j < 10; j++) {
                        if (isString(string)) {
                            long tempOffset = currentOffset - 1;
                            source.seek(tempOffset);
                            int genID = source.peek();
                            // is the next char a digit?
                            if (isDigit(genID)) {
                                tempOffset--;
                                source.seek(tempOffset);
                                if (isSpace()) {
                                    int length = 0;
                                    source.seek(--tempOffset);
                                    while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) {
                                        source.seek(--tempOffset);
                                        length++;
                                    }
                                    if (length > 0) {
                                        source.read();
                                        newOffset = source.getPosition();
                                        long objNumber = readObjectNumber();
                                        int genNumber = readGenerationNumber();
                                        COSObjectKey streamObjectKey = new COSObjectKey(objNumber, genNumber);
                                        bfSearchObjStreamsOffsets.put(newOffset, streamObjectKey);
                                    }
                                }
                            }
                            Log.d("PdfBox-Android", "Dictionary start for object stream -> " + newOffset);
                            objFound = true;
                            break;
                        } else {
                            currentOffset++;
                            source.read();
                        }
                    }
                }
            }
            source.seek(currentPosition + OBJ_STREAM.length);
        }
        source.read();
    }
    // add all found compressed objects to the brute force search result
    for (Long offset : bfSearchObjStreamsOffsets.keySet()) {
        Long bfOffset = bfSearchCOSObjectKeyOffsets.get(bfSearchObjStreamsOffsets.get(offset));
        // incomplete object stream found?
        if (bfOffset == null) {
            Log.w("PdfBox-Android", "Skipped incomplete object stream:" + bfSearchObjStreamsOffsets.get(offset) + " at " + offset);
            continue;
        }
        // check if the object was overwritten
        if (offset.equals(bfOffset)) {
            source.seek(offset);
            long stmObjNumber = readObjectNumber();
            int stmGenNumber = readGenerationNumber();
            readExpectedString(OBJ_MARKER, true);
            int nrOfObjects = 0;
            byte[] numbersBytes = null;
            COSStream stream = null;
            COSInputStream is = null;
            try {
                COSDictionary dict = parseCOSDictionary();
                int offsetFirstStream = dict.getInt(COSName.FIRST);
                nrOfObjects = dict.getInt(COSName.N);
                // skip the stream if required values are missing
                if (offsetFirstStream == -1 || nrOfObjects == -1) {
                    continue;
                }
                stream = parseCOSStream(dict);
                if (securityHandler != null) {
                    securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber);
                }
                is = stream.createInputStream();
                numbersBytes = new byte[offsetFirstStream];
                is.read(numbersBytes);
            } catch (IOException exception) {
                Log.d("PdfBox-Android", "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
                continue;
            } finally {
                if (is != null) {
                    is.close();
                }
                if (stream != null) {
                    stream.close();
                }
            }
            int start = 0;
            // skip spaces
            while (start < numbersBytes.length && numbersBytes[start] == 32) {
                start++;
            }
            String numbersStr = new String(numbersBytes, start, numbersBytes.length - start, "ISO-8859-1");
            numbersStr = numbersStr.replace('\n', ' ').replace("  ", " ");
            String[] numbers = numbersStr.split(" ");
            if (numbers.length < nrOfObjects * 2) {
                Log.d("PdfBox-Android", "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
                continue;
            }
            Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable();
            for (int i = 0; i < nrOfObjects; i++) {
                try {
                    long objNumber = Long.parseLong(numbers[i * 2]);
                    COSObjectKey objKey = new COSObjectKey(objNumber, 0);
                    Long existingOffset = bfSearchCOSObjectKeyOffsets.get(objKey);
                    if (existingOffset != null && existingOffset < 0) {
                        // translate stream object key to its offset
                        COSObjectKey objStmKey = new COSObjectKey(Math.abs(existingOffset), 0);
                        existingOffset = bfSearchCOSObjectKeyOffsets.get(objStmKey);
                    }
                    if (existingOffset == null || offset > existingOffset) {
                        bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber);
                        xrefOffset.put(objKey, -stmObjNumber);
                    }
                } catch (NumberFormatException exception) {
                    Log.d("PdfBox-Android", "Skipped corrupt object key in stream: " + stmObjNumber);
                }
            }
        }
    }
    source.seek(originOffset);
}
Also used : COSStream(com.tom_roush.pdfbox.cos.COSStream) COSInputStream(com.tom_roush.pdfbox.cos.COSInputStream) COSDictionary(com.tom_roush.pdfbox.cos.COSDictionary) HashMap(java.util.HashMap) IOException(java.io.IOException) COSObjectKey(com.tom_roush.pdfbox.cos.COSObjectKey)

Aggregations

COSObjectKey (com.tom_roush.pdfbox.cos.COSObjectKey)17 COSBase (com.tom_roush.pdfbox.cos.COSBase)7 COSObject (com.tom_roush.pdfbox.cos.COSObject)6 IOException (java.io.IOException)6 COSDictionary (com.tom_roush.pdfbox.cos.COSDictionary)4 COSArray (com.tom_roush.pdfbox.cos.COSArray)2 COSInteger (com.tom_roush.pdfbox.cos.COSInteger)2 COSNumber (com.tom_roush.pdfbox.cos.COSNumber)2 COSStream (com.tom_roush.pdfbox.cos.COSStream)2 COSDocument (com.tom_roush.pdfbox.cos.COSDocument)1 COSInputStream (com.tom_roush.pdfbox.cos.COSInputStream)1 COSString (com.tom_roush.pdfbox.cos.COSString)1 COSUpdateInfo (com.tom_roush.pdfbox.cos.COSUpdateInfo)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Entry (java.util.Map.Entry)1 TreeMap (java.util.TreeMap)1