use of org.apache.pdfbox.contentstream.operator.Operator in project pdfbox by apache.
the class PDFStreamEngine method processStreamOperators.
/**
* Processes the operators of the given content stream.
*
* @param contentStream to content stream to parse.
* @throws IOException if there is an error reading or parsing the content stream.
*/
private void processStreamOperators(PDContentStream contentStream) throws IOException {
List<COSBase> arguments = new ArrayList<>();
PDFStreamParser parser = new PDFStreamParser(contentStream);
Object token = parser.parseNextToken();
while (token != null) {
if (token instanceof COSObject) {
arguments.add(((COSObject) token).getObject());
} else if (token instanceof Operator) {
processOperator((Operator) token, arguments);
arguments = new ArrayList<>();
} else {
arguments.add((COSBase) token);
}
token = parser.parseNextToken();
}
}
use of org.apache.pdfbox.contentstream.operator.Operator in project pdfbox by apache.
the class PDFStreamParser method parseNextToken.
/**
* This will parse the next token in the stream.
*
* @return The next token in the stream or null if there are no more tokens in the stream.
*
* @throws IOException If an io error occurs while parsing the stream.
*/
public Object parseNextToken() throws IOException {
Object retval;
skipSpaces();
int nextByte = seqSource.peek();
if (((byte) nextByte) == -1) {
return null;
}
char c = (char) nextByte;
switch(c) {
case '<':
{
// pull off first left bracket
int leftBracket = seqSource.read();
// check for second left bracket
c = (char) seqSource.peek();
// put back first bracket
seqSource.unread(leftBracket);
if (c == '<') {
retval = parseCOSDictionary();
} else {
retval = parseCOSString();
}
break;
}
case '[':
{
// array
retval = parseCOSArray();
break;
}
case '(':
// string
retval = parseCOSString();
break;
case '/':
// name
retval = parseCOSName();
break;
case 'n':
{
// null
String nullString = readString();
if (nullString.equals("null")) {
retval = COSNull.NULL;
} else {
retval = Operator.getOperator(nullString);
}
break;
}
case 't':
case 'f':
{
String next = readString();
if (next.equals("true")) {
retval = COSBoolean.TRUE;
break;
} else if (next.equals("false")) {
retval = COSBoolean.FALSE;
} else {
retval = Operator.getOperator(next);
}
break;
}
case 'R':
{
String line = readString();
if (line.equals("R")) {
retval = new COSObject(null);
} else {
retval = Operator.getOperator(line);
}
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
case '+':
case '.':
{
/* We will be filling buf with the rest of the number. Only
* allow 1 "." and "-" and "+" at start of number. */
StringBuilder buf = new StringBuilder();
buf.append(c);
seqSource.read();
// Ignore double negative (this is consistent with Adobe Reader)
if (c == '-' && seqSource.peek() == c) {
seqSource.read();
}
boolean dotNotRead = c != '.';
while (Character.isDigit(c = (char) seqSource.peek()) || dotNotRead && c == '.' || c == '-') {
if (c != '-') {
// PDFBOX-4064: ignore "-" in the middle of a number
buf.append(c);
}
seqSource.read();
if (dotNotRead && c == '.') {
dotNotRead = false;
}
}
retval = COSNumber.get(buf.toString());
break;
}
case 'B':
{
String next = readString();
retval = Operator.getOperator(next);
if (next.equals("BI")) {
Operator beginImageOP = (Operator) retval;
COSDictionary imageParams = new COSDictionary();
beginImageOP.setImageParameters(imageParams);
Object nextToken = null;
while ((nextToken = parseNextToken()) instanceof COSName) {
Object value = parseNextToken();
imageParams.setItem((COSName) nextToken, (COSBase) value);
}
// final token will be the image data, maybe??
if (nextToken instanceof Operator) {
Operator imageData = (Operator) nextToken;
if (imageData.getImageData() == null || imageData.getImageData().length == 0) {
LOG.warn("empty inline image at stream offset " + seqSource.getPosition());
}
beginImageOP.setImageData(imageData.getImageData());
}
}
break;
}
case 'I':
{
// Special case for ID operator
String id = "" + (char) seqSource.read() + (char) seqSource.read();
if (!id.equals("ID")) {
throw new IOException("Error: Expected operator 'ID' actual='" + id + "'");
}
ByteArrayOutputStream imageData = new ByteArrayOutputStream();
if (isWhitespace()) {
// pull off the whitespace character
seqSource.read();
}
int lastByte = seqSource.read();
int currentByte = seqSource.read();
// Be aware not all kind of whitespaces are allowed here. see PDFBOX-1561
while (!(lastByte == 'E' && currentByte == 'I' && hasNextSpaceOrReturn() && hasNoFollowingBinData(seqSource)) && !seqSource.isEOF()) {
imageData.write(lastByte);
lastByte = currentByte;
currentByte = seqSource.read();
}
// the EI operator isn't unread, as it won't be processed anyway
retval = Operator.getOperator("ID");
// save the image data to the operator, so that it can be accessed later
((Operator) retval).setImageData(imageData.toByteArray());
break;
}
case ']':
{
// some ']' around without its previous '['
// this means a PDF is somewhat corrupt but we will continue to parse.
seqSource.read();
// must be a better solution than null...
retval = COSNull.NULL;
break;
}
default:
{
// we must be an operator
String operator = readOperator();
if (operator.trim().length() == 0) {
// we have a corrupt stream, stop reading here
retval = null;
} else {
retval = Operator.getOperator(operator);
}
}
}
return retval;
}
use of org.apache.pdfbox.contentstream.operator.Operator in project pdfbox by apache.
the class PDType3CharProc method getGlyphBBox.
/**
* Calculate the bounding box of this glyph. This will work only if the first operator in the
* stream is d1.
*
* @return the bounding box of this glyph, or null if the first operator is not d1.
* @throws IOException If an io error occurs while parsing the stream.
*/
public PDRectangle getGlyphBBox() throws IOException {
List<COSBase> arguments = new ArrayList<>();
PDFStreamParser parser = new PDFStreamParser(this);
Object token = parser.parseNextToken();
while (token != null) {
if (token instanceof COSObject) {
arguments.add(((COSObject) token).getObject());
} else if (token instanceof Operator) {
if (((Operator) token).getName().equals("d1") && arguments.size() == 6) {
for (int i = 0; i < 6; ++i) {
if (!(arguments.get(i) instanceof COSNumber)) {
return null;
}
}
return new PDRectangle(((COSNumber) arguments.get(2)).floatValue(), ((COSNumber) arguments.get(3)).floatValue(), ((COSNumber) arguments.get(4)).floatValue() - ((COSNumber) arguments.get(2)).floatValue(), ((COSNumber) arguments.get(5)).floatValue() - ((COSNumber) arguments.get(3)).floatValue());
} else {
return null;
}
} else {
arguments.add((COSBase) token);
}
token = parser.parseNextToken();
}
return null;
}
use of org.apache.pdfbox.contentstream.operator.Operator in project pdfbox by apache.
the class RemoveAllText method createTokensWithoutText.
private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
PDFStreamParser parser = new PDFStreamParser(contentStream);
Object token = parser.parseNextToken();
List<Object> newTokens = new ArrayList<>();
while (token != null) {
if (token instanceof Operator) {
Operator op = (Operator) token;
if ("TJ".equals(op.getName()) || "Tj".equals(op.getName()) || "'".equals(op.getName()) || "\"".equals(op.getName())) {
// remove the one argument to this operator
newTokens.remove(newTokens.size() - 1);
token = parser.parseNextToken();
continue;
}
}
newTokens.add(token);
token = parser.parseNextToken();
}
return newTokens;
}
Aggregations