use of org.apache.pdfbox.cos.COSInputStream in project pdfbox by apache.
the class COSParser method bfSearchForObjStreams.
/**
* Brute force search for all object streams.
*
* @throws IOException if something went wrong
*/
private void bfSearchForObjStreams() throws IOException {
HashMap<Long, COSObjectKey> bfSearchObjStreamsOffsets = new HashMap<>();
long originOffset = source.getPosition();
source.seek(MINIMUM_SEARCH_OFFSET);
char[] string = " obj".toCharArray();
while (!source.isEOF()) {
// search for EOF marker
if (isString(OBJ_STREAM)) {
long currentPosition = source.getPosition();
// search backwards for the beginning of the object
long newOffset = -1;
COSObjectKey streamObjectKey = null;
boolean objFound = false;
for (int i = 1; i < 40 && !objFound; i++) {
long currentOffset = currentPosition - (i * 10);
if (currentOffset > 0) {
source.seek(currentOffset);
for (int j = 0; j < 10; j++) {
if (isString(string)) {
long tempOffset = currentOffset - 1;
source.seek(tempOffset);
int genID = source.peek();
// is the next char a digit?
if (isDigit(genID)) {
tempOffset--;
source.seek(tempOffset);
if (isSpace()) {
int length = 0;
source.seek(--tempOffset);
while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) {
source.seek(--tempOffset);
length++;
}
if (length > 0) {
source.read();
newOffset = source.getPosition();
long objNumber = readObjectNumber();
int genNumber = readGenerationNumber();
streamObjectKey = new COSObjectKey(objNumber, genNumber);
bfSearchObjStreamsOffsets.put(newOffset, streamObjectKey);
}
}
}
LOG.debug("Dictionary start for object stream -> " + newOffset);
objFound = true;
break;
} else {
currentOffset++;
source.read();
}
}
}
}
source.seek(currentPosition + OBJ_STREAM.length);
}
source.read();
}
// add all found compressed objects to the brute force search result
for (Entry<Long, COSObjectKey> streamOffsetsEntry : bfSearchObjStreamsOffsets.entrySet()) {
Long offset = streamOffsetsEntry.getKey();
Long bfOffset = bfSearchCOSObjectKeyOffsets.get(streamOffsetsEntry.getValue());
// incomplete object stream found?
if (bfOffset == null) {
LOG.warn("Skipped incomplete object stream:" + streamOffsetsEntry.getValue() + " at " + offset);
continue;
}
// check if the object was overwritten
if (offset.equals(bfOffset)) {
source.seek(offset);
long stmObjNumber = readObjectNumber();
readGenerationNumber();
readExpectedString(OBJ_MARKER, true);
int nrOfObjects = 0;
byte[] numbersBytes = null;
COSStream stream = null;
COSInputStream is = null;
try {
COSDictionary dict = parseCOSDictionary();
int offsetFirstStream = dict.getInt(COSName.FIRST);
nrOfObjects = dict.getInt(COSName.N);
// skip the stream if required values are missing
if (offsetFirstStream == -1 || nrOfObjects == -1) {
continue;
}
stream = parseCOSStream(dict);
is = stream.createInputStream();
numbersBytes = new byte[offsetFirstStream];
long isResult = is.read(numbersBytes);
if (Long.compare(isResult, numbersBytes.length) != 0) {
LOG.debug("Tried reading " + numbersBytes.length + " bytes but only " + isResult + " bytes read");
}
} catch (IOException exception) {
LOG.debug("Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset, exception);
continue;
} finally {
if (is != null) {
is.close();
}
if (stream != null) {
stream.close();
}
}
int start = 0;
// skip spaces
while (numbersBytes[start] == 32) {
start++;
}
String numbersStr = new String(numbersBytes, start, numbersBytes.length - start, "ISO-8859-1");
numbersStr = numbersStr.replaceAll("\n", " ").replaceAll(" ", " ");
String[] numbers = numbersStr.split(" ");
if (numbers.length < nrOfObjects * 2) {
LOG.debug("Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset);
continue;
}
for (int i = 0; i < nrOfObjects; i++) {
long objNumber = Long.parseLong(numbers[i * 2]);
COSObjectKey objKey = new COSObjectKey(objNumber, 0);
Long existingOffset = bfSearchCOSObjectKeyOffsets.get(objKey);
if (existingOffset == null || offset > existingOffset) {
bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber);
}
}
}
}
source.seek(originOffset);
}
use of org.apache.pdfbox.cos.COSInputStream in project pdfbox by apache.
the class ShowSignature method printStreamsFromArray.
/**
* Go through the elements of a COSArray containing each an COSStream to print in Hex.
*
* @param elements COSArray of elements containing a COS Stream
* @param description to append on Print
* @throws IOException
*/
private void printStreamsFromArray(COSArray elements, String description) throws IOException {
for (COSBase baseElem : elements) {
COSObject streamObj = (COSObject) baseElem;
if (streamObj.getObject() instanceof COSStream) {
COSStream cosStream = (COSStream) streamObj.getObject();
COSInputStream input = cosStream.createInputStream();
byte[] streamBytes = IOUtils.toByteArray(input);
System.out.println(description + " (" + elements.indexOf(streamObj) + "): " + Hex.getString(streamBytes));
}
}
}
Aggregations