Search in sources :

Example 1 with COSInputStream

use of com.tom_roush.pdfbox.cos.COSInputStream in project PdfBox-Android by TomRoush.

the class COSParser method bfSearchForObjStreams.

/**
 * Brute force search for all object streams.
 *
 * @throws IOException if something went wrong
 */
private void bfSearchForObjStreams() throws IOException {
    HashMap<Long, COSObjectKey> bfSearchObjStreamsOffsets = new HashMap<Long, COSObjectKey>();
    long originOffset = source.getPosition();
    source.seek(MINIMUM_SEARCH_OFFSET);
    char[] string = " obj".toCharArray();
    while (!source.isEOF()) {
        // search for EOF marker
        if (isString(OBJ_STREAM)) {
            long currentPosition = source.getPosition();
            // search backwards for the beginning of the object
            long newOffset = -1;
            boolean objFound = false;
            for (int i = 1; i < 40 && !objFound; i++) {
                long currentOffset = currentPosition - (i * 10);
                if (currentOffset > 0) {
                    source.seek(currentOffset);
                    for (int j = 0; j < 10; j++) {
                        if (isString(string)) {
                            long tempOffset = currentOffset - 1;
                            source.seek(tempOffset);
                            int genID = source.peek();
                            // is the next char a digit?
                            if (isDigit(genID)) {
                                tempOffset--;
                                source.seek(tempOffset);
                                if (isSpace()) {
                                    int length = 0;
                                    source.seek(--tempOffset);
                                    while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) {
                                        source.seek(--tempOffset);
                                        length++;
                                    }
                                    if (length > 0) {
                                        source.read();
                                        newOffset = source.getPosition();
                                        long objNumber = readObjectNumber();
                                        int genNumber = readGenerationNumber();
                                        COSObjectKey streamObjectKey = new COSObjectKey(objNumber, genNumber);
                                        bfSearchObjStreamsOffsets.put(newOffset, streamObjectKey);
                                    }
                                }
                            }
                            Log.d("PdfBox-Android", "Dictionary start for object stream -> " + newOffset);
                            objFound = true;
                            break;
                        } else {
                            currentOffset++;
                            source.read();
                        }
                    }
                }
            }
            source.seek(currentPosition + OBJ_STREAM.length);
        }
        source.read();
    }
    // add all found compressed objects to the brute force search result
    for (Long offset : bfSearchObjStreamsOffsets.keySet()) {
        Long bfOffset = bfSearchCOSObjectKeyOffsets.get(bfSearchObjStreamsOffsets.get(offset));
        // incomplete object stream found?
        if (bfOffset == null) {
            Log.w("PdfBox-Android", "Skipped incomplete object stream:" + bfSearchObjStreamsOffsets.get(offset) + " at " + offset);
            continue;
        }
        // check if the object was overwritten
        if (offset.equals(bfOffset)) {
            source.seek(offset);
            long stmObjNumber = readObjectNumber();
            int stmGenNumber = readGenerationNumber();
            readExpectedString(OBJ_MARKER, true);
            int nrOfObjects = 0;
            byte[] numbersBytes = null;
            COSStream stream = null;
            COSInputStream is = null;
            try {
                COSDictionary dict = parseCOSDictionary();
                int offsetFirstStream = dict.getInt(COSName.FIRST);
                nrOfObjects = dict.getInt(COSName.N);
                // skip the stream if required values are missing
                if (offsetFirstStream == -1 || nrOfObjects == -1) {
                    continue;
                }
                stream = parseCOSStream(dict);
                if (securityHandler != null) {
                    securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber);
                }
                is = stream.createInputStream();
                numbersBytes = new byte[offsetFirstStream];
                is.read(numbersBytes);
            } catch (IOException exception) {
                Log.d("PdfBox-Android", "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
                continue;
            } finally {
                if (is != null) {
                    is.close();
                }
                if (stream != null) {
                    stream.close();
                }
            }
            int start = 0;
            // skip spaces
            while (start < numbersBytes.length && numbersBytes[start] == 32) {
                start++;
            }
            String numbersStr = new String(numbersBytes, start, numbersBytes.length - start, "ISO-8859-1");
            numbersStr = numbersStr.replace('\n', ' ').replace("  ", " ");
            String[] numbers = numbersStr.split(" ");
            if (numbers.length < nrOfObjects * 2) {
                Log.d("PdfBox-Android", "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
                continue;
            }
            Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable();
            for (int i = 0; i < nrOfObjects; i++) {
                try {
                    long objNumber = Long.parseLong(numbers[i * 2]);
                    COSObjectKey objKey = new COSObjectKey(objNumber, 0);
                    Long existingOffset = bfSearchCOSObjectKeyOffsets.get(objKey);
                    if (existingOffset != null && existingOffset < 0) {
                        // translate stream object key to its offset
                        COSObjectKey objStmKey = new COSObjectKey(Math.abs(existingOffset), 0);
                        existingOffset = bfSearchCOSObjectKeyOffsets.get(objStmKey);
                    }
                    if (existingOffset == null || offset > existingOffset) {
                        bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber);
                        xrefOffset.put(objKey, -stmObjNumber);
                    }
                } catch (NumberFormatException exception) {
                    Log.d("PdfBox-Android", "Skipped corrupt object key in stream: " + stmObjNumber);
                }
            }
        }
    }
    source.seek(originOffset);
}
Also used : COSStream(com.tom_roush.pdfbox.cos.COSStream) COSInputStream(com.tom_roush.pdfbox.cos.COSInputStream) COSDictionary(com.tom_roush.pdfbox.cos.COSDictionary) HashMap(java.util.HashMap) IOException(java.io.IOException) COSObjectKey(com.tom_roush.pdfbox.cos.COSObjectKey)

Aggregations

COSDictionary (com.tom_roush.pdfbox.cos.COSDictionary)1 COSInputStream (com.tom_roush.pdfbox.cos.COSInputStream)1 COSObjectKey (com.tom_roush.pdfbox.cos.COSObjectKey)1 COSStream (com.tom_roush.pdfbox.cos.COSStream)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1