Search in sources :

Example 6 with PDFStreamParser

use of org.apache.pdfbox.pdfparser.PDFStreamParser in project pdfbox by apache.

the class PDSquareAnnotationTest method validateAppearance.

@Test
public void validateAppearance() throws IOException {
    // the width of the <nnotations border
    final int borderWidth = 1;
    File file = new File(IN_DIR, NAME_OF_PDF);
    try (PDDocument document = PDDocument.load(file)) {
        PDPage page = document.getPage(0);
        List<PDAnnotation> annotations = page.getAnnotations();
        PDAnnotationSquareCircle annotation = (PDAnnotationSquareCircle) annotations.get(0);
        // test the correct setting of the appearance stream
        assertNotNull("Appearance dictionary shall not be null", annotation.getAppearance());
        assertNotNull("Normal appearance shall not be null", annotation.getAppearance().getNormalAppearance());
        PDAppearanceStream appearanceStream = annotation.getAppearance().getNormalAppearance().getAppearanceStream();
        assertNotNull("Appearance stream shall not be null", appearanceStream);
        assertEquals(rectangle.getLowerLeftX(), appearanceStream.getBBox().getLowerLeftX(), DELTA);
        assertEquals(rectangle.getLowerLeftY(), appearanceStream.getBBox().getLowerLeftY(), DELTA);
        assertEquals(rectangle.getWidth(), appearanceStream.getBBox().getWidth(), DELTA);
        assertEquals(rectangle.getHeight(), appearanceStream.getBBox().getHeight(), DELTA);
        Matrix matrix = appearanceStream.getMatrix();
        assertNotNull("Matrix shall not be null", matrix);
        // should have been translated to a 0 origin
        assertEquals(-rectangle.getLowerLeftX(), matrix.getTranslateX(), DELTA);
        assertEquals(-rectangle.getLowerLeftY(), matrix.getTranslateY(), DELTA);
        // test the content of the appearance stream
        PDStream contentStream = appearanceStream.getContentStream();
        assertNotNull("Content stream shall not be null", contentStream);
        PDFStreamParser parser = new PDFStreamParser(appearanceStream);
        parser.parse();
        List<Object> tokens = parser.getTokens();
        // the samples content stream should contain 10 tokens
        assertEquals(10, tokens.size());
        // setting of the stroking color
        assertEquals(1, ((COSInteger) tokens.get(0)).intValue());
        assertEquals(0, ((COSInteger) tokens.get(1)).intValue());
        assertEquals(0, ((COSInteger) tokens.get(2)).intValue());
        assertEquals("RG", ((Operator) tokens.get(3)).getName());
        // setting of the rectangle for the border
        // it shall be inset by the border width
        assertEquals(rectangle.getLowerLeftX() + borderWidth, ((COSFloat) tokens.get(4)).floatValue(), DELTA);
        assertEquals(rectangle.getLowerLeftY() + borderWidth, ((COSFloat) tokens.get(5)).floatValue(), DELTA);
        assertEquals(rectangle.getWidth() - 2 * borderWidth, ((COSFloat) tokens.get(6)).floatValue(), DELTA);
        assertEquals(rectangle.getHeight() - 2 * borderWidth, ((COSFloat) tokens.get(7)).floatValue(), DELTA);
        assertEquals("re", ((Operator) tokens.get(8)).getName());
        assertEquals("S", ((Operator) tokens.get(9)).getName());
    }
}
Also used : PDPage(org.apache.pdfbox.pdmodel.PDPage) PDStream(org.apache.pdfbox.pdmodel.common.PDStream) Matrix(org.apache.pdfbox.util.Matrix) PDFStreamParser(org.apache.pdfbox.pdfparser.PDFStreamParser) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) File(java.io.File) Test(org.junit.Test)

Example 7 with PDFStreamParser

use of org.apache.pdfbox.pdfparser.PDFStreamParser in project pdfbox by apache.

the class ControlCharacterTest method getStringsFromStream.

private List<String> getStringsFromStream(PDField field) throws IOException {
    PDAnnotationWidget widget = field.getWidgets().get(0);
    PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream());
    Object token = parser.parseNextToken();
    List<String> stringValues = new ArrayList<>();
    while (token != null) {
        if (token instanceof COSString) {
            // TODO: improve the string output to better match
            // trimming as Acrobat adds spaces to strings
            // where we don't
            stringValues.add(((COSString) token).getString().trim());
        }
        token = parser.parseNextToken();
    }
    return stringValues;
}
Also used : PDFStreamParser(org.apache.pdfbox.pdfparser.PDFStreamParser) ArrayList(java.util.ArrayList) PDAnnotationWidget(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget) COSString(org.apache.pdfbox.cos.COSString) COSString(org.apache.pdfbox.cos.COSString)

Example 8 with PDFStreamParser

use of org.apache.pdfbox.pdfparser.PDFStreamParser in project pdfbox by apache.

the class PDFStreamEngine method processStreamOperators.

/**
 * Processes the operators of the given content stream.
 *
 * @param contentStream to content stream to parse.
 * @throws IOException if there is an error reading or parsing the content stream.
 */
private void processStreamOperators(PDContentStream contentStream) throws IOException {
    List<COSBase> arguments = new ArrayList<>();
    PDFStreamParser parser = new PDFStreamParser(contentStream);
    Object token = parser.parseNextToken();
    while (token != null) {
        if (token instanceof COSObject) {
            arguments.add(((COSObject) token).getObject());
        } else if (token instanceof Operator) {
            processOperator((Operator) token, arguments);
            arguments = new ArrayList<>();
        } else {
            arguments.add((COSBase) token);
        }
        token = parser.parseNextToken();
    }
}
Also used : Operator(org.apache.pdfbox.contentstream.operator.Operator) PDFStreamParser(org.apache.pdfbox.pdfparser.PDFStreamParser) COSObject(org.apache.pdfbox.cos.COSObject) ArrayList(java.util.ArrayList) COSBase(org.apache.pdfbox.cos.COSBase) COSObject(org.apache.pdfbox.cos.COSObject) PDFormXObject(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)

Example 9 with PDFStreamParser

use of org.apache.pdfbox.pdfparser.PDFStreamParser in project pdfbox by apache.

the class PDType3CharProc method getGlyphBBox.

/**
 * Calculate the bounding box of this glyph. This will work only if the first operator in the
 * stream is d1.
 *
 * @return the bounding box of this glyph, or null if the first operator is not d1.
 * @throws IOException If an io error occurs while parsing the stream.
 */
public PDRectangle getGlyphBBox() throws IOException {
    List<COSBase> arguments = new ArrayList<>();
    PDFStreamParser parser = new PDFStreamParser(this);
    Object token = parser.parseNextToken();
    while (token != null) {
        if (token instanceof COSObject) {
            arguments.add(((COSObject) token).getObject());
        } else if (token instanceof Operator) {
            if (((Operator) token).getName().equals("d1") && arguments.size() == 6) {
                for (int i = 0; i < 6; ++i) {
                    if (!(arguments.get(i) instanceof COSNumber)) {
                        return null;
                    }
                }
                return new PDRectangle(((COSNumber) arguments.get(2)).floatValue(), ((COSNumber) arguments.get(3)).floatValue(), ((COSNumber) arguments.get(4)).floatValue() - ((COSNumber) arguments.get(2)).floatValue(), ((COSNumber) arguments.get(5)).floatValue() - ((COSNumber) arguments.get(3)).floatValue());
            } else {
                return null;
            }
        } else {
            arguments.add((COSBase) token);
        }
        token = parser.parseNextToken();
    }
    return null;
}
Also used : Operator(org.apache.pdfbox.contentstream.operator.Operator) PDFStreamParser(org.apache.pdfbox.pdfparser.PDFStreamParser) COSObject(org.apache.pdfbox.cos.COSObject) ArrayList(java.util.ArrayList) COSNumber(org.apache.pdfbox.cos.COSNumber) COSBase(org.apache.pdfbox.cos.COSBase) COSObject(org.apache.pdfbox.cos.COSObject) PDRectangle(org.apache.pdfbox.pdmodel.common.PDRectangle)

Example 10 with PDFStreamParser

use of org.apache.pdfbox.pdfparser.PDFStreamParser in project pdfbox by apache.

the class RemoveAllText method createTokensWithoutText.

private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
    PDFStreamParser parser = new PDFStreamParser(contentStream);
    Object token = parser.parseNextToken();
    List<Object> newTokens = new ArrayList<>();
    while (token != null) {
        if (token instanceof Operator) {
            Operator op = (Operator) token;
            if ("TJ".equals(op.getName()) || "Tj".equals(op.getName()) || "'".equals(op.getName()) || "\"".equals(op.getName())) {
                // remove the one argument to this operator
                newTokens.remove(newTokens.size() - 1);
                token = parser.parseNextToken();
                continue;
            }
        }
        newTokens.add(token);
        token = parser.parseNextToken();
    }
    return newTokens;
}
Also used : Operator(org.apache.pdfbox.contentstream.operator.Operator) PDFStreamParser(org.apache.pdfbox.pdfparser.PDFStreamParser) ArrayList(java.util.ArrayList) PDXObject(org.apache.pdfbox.pdmodel.graphics.PDXObject) PDFormXObject(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)

Aggregations

PDFStreamParser (org.apache.pdfbox.pdfparser.PDFStreamParser)11 ArrayList (java.util.ArrayList)6 Operator (org.apache.pdfbox.contentstream.operator.Operator)6 COSBase (org.apache.pdfbox.cos.COSBase)4 COSObject (org.apache.pdfbox.cos.COSObject)4 File (java.io.File)2 PDPage (org.apache.pdfbox.pdmodel.PDPage)2 PDFormXObject (org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 COSFloat (org.apache.pdfbox.cos.COSFloat)1 COSNumber (org.apache.pdfbox.cos.COSNumber)1 COSString (org.apache.pdfbox.cos.COSString)1 ScratchFile (org.apache.pdfbox.io.ScratchFile)1 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)1 PDRectangle (org.apache.pdfbox.pdmodel.common.PDRectangle)1 PDStream (org.apache.pdfbox.pdmodel.common.PDStream)1 PDXObject (org.apache.pdfbox.pdmodel.graphics.PDXObject)1 PDAnnotationWidget (org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget)1 Matrix (org.apache.pdfbox.util.Matrix)1