use of com.tom_roush.pdfbox.cos.COSObjectKey in project PdfBox-Android by TomRoush.
the class COSParser method parseDictObjects.
/**
* Will parse every object necessary to load a single page from the pdf document. We try our
* best to order objects according to offset in file before reading to minimize seek operations.
*
* @param dict the COSObject from the parent pages.
* @param excludeObjects dictionary object reference entries with these names will not be parsed
*
* @throws IOException if something went wrong
*/
protected void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException {
// ---- create queue for objects waiting for further parsing
final Queue<COSBase> toBeParsedList = new LinkedList<COSBase>();
// offset ordered object map
final TreeMap<Long, List<COSObject>> objToBeParsed = new TreeMap<Long, List<COSObject>>();
// in case of compressed objects offset points to stmObj
final Set<Long> parsedObjects = new HashSet<Long>();
final Set<Long> addedObjects = new HashSet<Long>();
addExcludedToList(excludeObjects, dict, parsedObjects);
addNewToList(toBeParsedList, dict.getValues(), addedObjects);
// ---- go through objects to be parsed
while (!(toBeParsedList.isEmpty() && objToBeParsed.isEmpty())) {
// -- first get all COSObject from other kind of objects and
// put them in objToBeParsed; afterwards toBeParsedList is empty
COSBase baseObj;
while ((baseObj = toBeParsedList.poll()) != null) {
if (baseObj instanceof COSDictionary) {
addNewToList(toBeParsedList, ((COSDictionary) baseObj).getValues(), addedObjects);
} else if (baseObj instanceof COSArray) {
for (COSBase cosBase : (COSArray) baseObj) {
addNewToList(toBeParsedList, cosBase, addedObjects);
}
} else if (baseObj instanceof COSObject) {
COSObject obj = (COSObject) baseObj;
long objId = getObjectId(obj);
COSObjectKey objKey = new COSObjectKey(obj.getObjectNumber(), obj.getGenerationNumber());
if (!parsedObjects.contains(objId)) {
Long fileOffset = document.getXrefTable().get(objKey);
if (fileOffset == null && isLenient && bfSearchCOSObjectKeyOffsets != null) {
fileOffset = bfSearchCOSObjectKeyOffsets.get(objKey);
if (fileOffset != null) {
Log.d("PdfBox-Android", "Set missing " + fileOffset + " for object " + objKey);
document.getXrefTable().put(objKey, fileOffset);
}
}
// thus we have to test
if (fileOffset != null && fileOffset != 0) {
if (fileOffset > 0) {
objToBeParsed.put(fileOffset, Collections.singletonList(obj));
} else {
// negative offset means we have a compressed
// object within object stream => get offset of object stream
COSObjectKey key = new COSObjectKey((int) -fileOffset, 0);
fileOffset = document.getXrefTable().get(key);
if (fileOffset == null || fileOffset <= 0) {
if (isLenient && bfSearchCOSObjectKeyOffsets != null) {
fileOffset = bfSearchCOSObjectKeyOffsets.get(key);
if (fileOffset != null) {
Log.d("PdfBox-Android", "Set missing " + fileOffset + " for object " + key);
document.getXrefTable().put(key, fileOffset);
} else {
Log.w("PdfBox-Android", "Invalid object stream xref object reference for key '" + objKey + "': " + fileOffset);
continue;
}
} else {
String msg = "Invalid object stream xref object reference for key '" + objKey + "': " + fileOffset;
if (isLenient && fileOffset == null) {
Log.w("PdfBox-Android", msg);
continue;
}
throw new IOException(msg);
}
}
List<COSObject> stmObjects = objToBeParsed.get(fileOffset);
if (stmObjects == null) {
stmObjects = new ArrayList<COSObject>();
objToBeParsed.put(fileOffset, stmObjects);
} else // java does not have a test for immutable
if (!(stmObjects instanceof ArrayList)) {
throw new IOException(obj + " cannot be assigned to offset " + fileOffset + ", this belongs to " + stmObjects.get(0));
}
stmObjects.add(obj);
}
} else {
// NULL object
COSObject pdfObject = document.getObjectFromPool(objKey);
pdfObject.setObject(COSNull.NULL);
}
}
}
}
// resulting object will be added to toBeParsedList
if (objToBeParsed.isEmpty()) {
break;
}
for (COSObject obj : objToBeParsed.remove(objToBeParsed.firstKey())) {
COSBase parsedObj = parseObjectDynamically(obj, false);
if (parsedObj != null) {
obj.setObject(parsedObj);
addNewToList(toBeParsedList, parsedObj, addedObjects);
parsedObjects.add(getObjectId(obj));
}
}
}
}
use of com.tom_roush.pdfbox.cos.COSObjectKey in project PdfBox-Android by TomRoush.
the class COSParser method bfSearchForObjStreams.
/**
* Brute force search for all object streams.
*
* @throws IOException if something went wrong
*/
private void bfSearchForObjStreams() throws IOException {
HashMap<Long, COSObjectKey> bfSearchObjStreamsOffsets = new HashMap<Long, COSObjectKey>();
long originOffset = source.getPosition();
source.seek(MINIMUM_SEARCH_OFFSET);
char[] string = " obj".toCharArray();
while (!source.isEOF()) {
// search for EOF marker
if (isString(OBJ_STREAM)) {
long currentPosition = source.getPosition();
// search backwards for the beginning of the object
long newOffset = -1;
boolean objFound = false;
for (int i = 1; i < 40 && !objFound; i++) {
long currentOffset = currentPosition - (i * 10);
if (currentOffset > 0) {
source.seek(currentOffset);
for (int j = 0; j < 10; j++) {
if (isString(string)) {
long tempOffset = currentOffset - 1;
source.seek(tempOffset);
int genID = source.peek();
// is the next char a digit?
if (isDigit(genID)) {
tempOffset--;
source.seek(tempOffset);
if (isSpace()) {
int length = 0;
source.seek(--tempOffset);
while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) {
source.seek(--tempOffset);
length++;
}
if (length > 0) {
source.read();
newOffset = source.getPosition();
long objNumber = readObjectNumber();
int genNumber = readGenerationNumber();
COSObjectKey streamObjectKey = new COSObjectKey(objNumber, genNumber);
bfSearchObjStreamsOffsets.put(newOffset, streamObjectKey);
}
}
}
Log.d("PdfBox-Android", "Dictionary start for object stream -> " + newOffset);
objFound = true;
break;
} else {
currentOffset++;
source.read();
}
}
}
}
source.seek(currentPosition + OBJ_STREAM.length);
}
source.read();
}
// add all found compressed objects to the brute force search result
for (Long offset : bfSearchObjStreamsOffsets.keySet()) {
Long bfOffset = bfSearchCOSObjectKeyOffsets.get(bfSearchObjStreamsOffsets.get(offset));
// incomplete object stream found?
if (bfOffset == null) {
Log.w("PdfBox-Android", "Skipped incomplete object stream:" + bfSearchObjStreamsOffsets.get(offset) + " at " + offset);
continue;
}
// check if the object was overwritten
if (offset.equals(bfOffset)) {
source.seek(offset);
long stmObjNumber = readObjectNumber();
int stmGenNumber = readGenerationNumber();
readExpectedString(OBJ_MARKER, true);
int nrOfObjects = 0;
byte[] numbersBytes = null;
COSStream stream = null;
COSInputStream is = null;
try {
COSDictionary dict = parseCOSDictionary();
int offsetFirstStream = dict.getInt(COSName.FIRST);
nrOfObjects = dict.getInt(COSName.N);
// skip the stream if required values are missing
if (offsetFirstStream == -1 || nrOfObjects == -1) {
continue;
}
stream = parseCOSStream(dict);
if (securityHandler != null) {
securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber);
}
is = stream.createInputStream();
numbersBytes = new byte[offsetFirstStream];
is.read(numbersBytes);
} catch (IOException exception) {
Log.d("PdfBox-Android", "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
continue;
} finally {
if (is != null) {
is.close();
}
if (stream != null) {
stream.close();
}
}
int start = 0;
// skip spaces
while (start < numbersBytes.length && numbersBytes[start] == 32) {
start++;
}
String numbersStr = new String(numbersBytes, start, numbersBytes.length - start, "ISO-8859-1");
numbersStr = numbersStr.replace('\n', ' ').replace(" ", " ");
String[] numbers = numbersStr.split(" ");
if (numbers.length < nrOfObjects * 2) {
Log.d("PdfBox-Android", "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
continue;
}
Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable();
for (int i = 0; i < nrOfObjects; i++) {
try {
long objNumber = Long.parseLong(numbers[i * 2]);
COSObjectKey objKey = new COSObjectKey(objNumber, 0);
Long existingOffset = bfSearchCOSObjectKeyOffsets.get(objKey);
if (existingOffset != null && existingOffset < 0) {
// translate stream object key to its offset
COSObjectKey objStmKey = new COSObjectKey(Math.abs(existingOffset), 0);
existingOffset = bfSearchCOSObjectKeyOffsets.get(objStmKey);
}
if (existingOffset == null || offset > existingOffset) {
bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber);
xrefOffset.put(objKey, -stmObjNumber);
}
} catch (NumberFormatException exception) {
Log.d("PdfBox-Android", "Skipped corrupt object key in stream: " + stmObjNumber);
}
}
}
}
source.seek(originOffset);
}
Aggregations