Search in sources :

Example 51 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class AbstractTestAction method valid.

protected void valid(COSDictionary action, boolean valid, String expectedCode) throws Exception {
    ActionManagerFactory fact = new ActionManagerFactory();
    PreflightContext ctx = createContext();
    ctx.setConfig(PreflightConfiguration.createPdfA1BConfiguration());
    COSDictionary dict = new COSDictionary();
    dict.setItem(COSName.A, action);
    // process the action validation
    List<AbstractActionManager> actions = fact.getActionManagers(ctx, dict);
    for (AbstractActionManager abstractActionManager : actions) {
        abstractActionManager.valid();
    }
    // check the result
    if (!valid) {
        List<ValidationError> errors = ctx.getDocument().getResult().getErrorsList();
        assertFalse(errors.isEmpty());
        if (expectedCode != null || !"".equals(expectedCode)) {
            boolean found = false;
            for (ValidationError err : errors) {
                if (err.getErrorCode().equals(expectedCode)) {
                    found = true;
                    break;
                }
            }
            assertTrue(found);
        }
    } else {
        if (ctx.getDocument().getResult() != null) {
            List<ValidationError> errors = ctx.getDocument().getResult().getErrorsList();
            assertTrue(errors.isEmpty());
        }
    }
    ctx.getDocument().close();
}
Also used : COSDictionary(org.apache.pdfbox.cos.COSDictionary) ActionManagerFactory(org.apache.pdfbox.preflight.action.ActionManagerFactory) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) PreflightContext(org.apache.pdfbox.preflight.PreflightContext) AbstractActionManager(org.apache.pdfbox.preflight.action.AbstractActionManager)

Example 52 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class CreatePDFATest method testCreatePDFA.

/**
 * Test of doIt method of class CreatePDFA.
 */
public void testCreatePDFA() throws Exception {
    System.out.println("testCreatePDFA");
    String pdfaFilename = outDir + "/PDFA.pdf";
    String message = "The quick brown fox jumps over the lazy dog äöüÄÖÜß @°^²³ {[]}";
    String dir = "../pdfbox/src/main/resources/org/apache/pdfbox/resources/ttf/";
    String fontfile = dir + "LiberationSans-Regular.ttf";
    CreatePDFA.main(new String[] { pdfaFilename, message, fontfile });
    PreflightParser preflightParser = new PreflightParser(new File(pdfaFilename));
    preflightParser.parse();
    try (PreflightDocument preflightDocument = preflightParser.getPreflightDocument()) {
        preflightDocument.validate();
        ValidationResult result = preflightDocument.getResult();
        for (ValidationError ve : result.getErrorsList()) {
            System.err.println(ve.getErrorCode() + ": " + ve.getDetails());
        }
        assertTrue("PDF file created with CreatePDFA is not valid PDF/A-1b", result.isValid());
    }
    // check the XMP metadata
    PDDocument document = PDDocument.load(new File(pdfaFilename));
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata meta = catalog.getMetadata();
    DomXmpParser xmpParser = new DomXmpParser();
    XMPMetadata metadata = xmpParser.parse(meta.createInputStream());
    DublinCoreSchema dc = metadata.getDublinCoreSchema();
    assertEquals(pdfaFilename, dc.getTitle());
    document.close();
}
Also used : XMPMetadata(org.apache.xmpbox.XMPMetadata) DomXmpParser(org.apache.xmpbox.xml.DomXmpParser) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) PDMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) ValidationResult(org.apache.pdfbox.preflight.ValidationResult) DublinCoreSchema(org.apache.xmpbox.schema.DublinCoreSchema) PreflightParser(org.apache.pdfbox.preflight.parser.PreflightParser) File(java.io.File) PreflightDocument(org.apache.pdfbox.preflight.PreflightDocument) PDDocumentCatalog(org.apache.pdfbox.pdmodel.PDDocumentCatalog)

Example 53 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class PreflightParser method lastIndexOf.

@Override
protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endOff) {
    int offset = super.lastIndexOf(pattern, buf, endOff);
    if (offset > 0 && Arrays.equals(pattern, EOF_MARKER)) {
        // this is the offset of the last %%EOF sequence.
        // nothing should be present after this sequence.
        int tmpOffset = offset + pattern.length;
        if (tmpOffset != buf.length) {
            // EOL is authorized
            if ((buf.length - tmpOffset) > 2 || (buf.length - tmpOffset == 2 && (buf[tmpOffset] != 13 || buf[tmpOffset + 1] != 10)) || (buf.length - tmpOffset == 1 && (buf[tmpOffset] != 13 && buf[tmpOffset] != 10))) {
                long position;
                try {
                    position = source.getPosition();
                } catch (IOException excpetion) {
                    position = Long.MIN_VALUE;
                }
                addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF, "File contains data after the last %%EOF sequence at offset " + position));
            }
        }
    }
    return offset;
}
Also used : IOException(java.io.IOException) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 54 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class PreflightParser method parseObjectDynamically.

@Override
protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj) throws IOException {
    // ---- create object key and get object (container) from pool
    final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr);
    final COSObject pdfObject = document.getObjectFromPool(objKey);
    if (pdfObject.getObject() == null) {
        // not previously parsed
        // ---- read offset or object stream object number from xref table
        Long offsetOrObjstmObNr = document.getXrefTable().get(objKey);
        // sanity test to circumvent loops with broken documents
        if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null))) {
            addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()));
            throw new SyntaxValidationException("Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
        }
        if (offsetOrObjstmObNr == null) {
            // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
            pdfObject.setObject(COSNull.NULL);
        } else if (offsetOrObjstmObNr == 0) {
            addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber() + ":" + objKey.getGeneration() + "} has an offset of 0"));
        } else if (offsetOrObjstmObNr > 0) {
            // offset of indirect object in file
            // ---- go to object start
            source.seek(offsetOrObjstmObNr);
            // ---- we must have an indirect object
            long readObjNr;
            int readObjGen;
            long offset = source.getPosition();
            String line = readLine();
            Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
            Matcher matcher = pattern.matcher(line);
            if (matcher.matches()) {
                readObjNr = Long.parseLong(matcher.group(1));
                readObjGen = Integer.parseInt(matcher.group(2));
            } else {
                addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset=" + offset + "; key=" + offsetOrObjstmObNr.toString() + "; line=" + line + "; object=" + pdfObject.toString() + "]"));
                // reset source cursor to read object information
                source.seek(offset);
                readObjNr = readObjectNumber();
                readObjGen = readGenerationNumber();
                // skip spaces between Object Generation number and the 'obj' keyword
                skipSpaces();
                for (char c : OBJ_MARKER) {
                    if (source.read() != c) {
                        addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'"));
                        throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'", validationResult);
                    }
                }
            }
            // ---- consistency check
            if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) {
                throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr + ":" + readObjGen);
            }
            skipSpaces();
            COSBase pb = parseDirObject();
            skipSpaces();
            long endObjectOffset = source.getPosition();
            String endObjectKey = readString();
            if (endObjectKey.equals("stream")) {
                source.seek(endObjectOffset);
                if (pb instanceof COSDictionary) {
                    COSStream stream = parseCOSStream((COSDictionary) pb);
                    if (securityHandler != null) {
                        securityHandler.decryptStream(stream, objNr, objGenNr);
                    }
                    pb = stream;
                } else {
                    // the combination of a dict and the stream/endstream forms a complete stream object
                    throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
                }
                skipSpaces();
                endObjectOffset = source.getPosition();
                endObjectKey = readString();
                // we have case with a second 'endstream' before endobj
                if (!endObjectKey.startsWith("endobj")) {
                    if (endObjectKey.startsWith("endstream")) {
                        endObjectKey = endObjectKey.substring(9).trim();
                        if (endObjectKey.length() == 0) {
                            // no other characters in extra endstream line
                            // read next line
                            endObjectKey = readString();
                        }
                    }
                }
            } else if (securityHandler != null) {
                securityHandler.decrypt(pb, objNr, objGenNr);
            }
            pdfObject.setObject(pb);
            if (!endObjectKey.startsWith("endobj")) {
                throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'.");
            } else {
                offset = source.getPosition();
                source.seek(endObjectOffset - 1);
                if (!nextIsEOL()) {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword at offset " + source.getPosition()));
                }
                source.seek(offset);
            }
            if (!nextIsEOL()) {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword at offset " + source.getPosition()));
            }
        } else {
            // xref value is object nr of object stream containing object to be parsed;
            // since our object was not found it means object stream was not parsed so far
            final int objstmObjNr = (int) (-offsetOrObjstmObNr);
            final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
            if (objstmBaseObj instanceof COSStream) {
                // parse object stream
                PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
                parser.parse();
                // register all objects which are referenced to be contained in object stream
                for (COSObject next : parser.getObjects()) {
                    COSObjectKey stmObjKey = new COSObjectKey(next);
                    Long offset = document.getXrefTable().get(stmObjKey);
                    if (offset != null && offset == -objstmObjNr) {
                        COSObject stmObj = document.getObjectFromPool(stmObjKey);
                        stmObj.setObject(next.getObject());
                    }
                }
            }
        }
    }
    return pdfObject.getObject();
}
Also used : Pattern(java.util.regex.Pattern) COSStream(org.apache.pdfbox.cos.COSStream) COSDictionary(org.apache.pdfbox.cos.COSDictionary) Matcher(java.util.regex.Matcher) SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) COSString(org.apache.pdfbox.cos.COSString) IOException(java.io.IOException) PDFObjectStreamParser(org.apache.pdfbox.pdfparser.PDFObjectStreamParser) COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) COSObject(org.apache.pdfbox.cos.COSObject) COSBase(org.apache.pdfbox.cos.COSBase) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 55 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class BookmarkValidationProcess method exploreOutlineLevel.

/**
 * This method explores the Outline Item Level and calls a validation method on each Outline Item. If an invalid
 * outline item is found, the result list is updated.
 *
 * @param ctx the preflight context.
 * @param inputItem The first outline item of the level.
 * @param firstObj The first PDF object of the level.
 * @param lastObj The last PDF object of the level.
 * @return true if all items are valid in this level.
 * @throws ValidationException
 */
protected boolean exploreOutlineLevel(PreflightContext ctx, PDOutlineItem inputItem, COSObject firstObj, COSObject lastObj) throws ValidationException {
    PDOutlineItem currentItem = inputItem;
    COSObject currentObj = firstObj;
    Set<COSObject> levelObjects = new HashSet<>();
    levelObjects.add(firstObj);
    boolean result = true;
    if (currentItem != null && inputItem.getPreviousSibling() != null) {
        addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "The value of /Prev of first object " + firstObj + " on a level is " + inputItem.getCOSObject().getItem(COSName.PREV) + ", but shouldn't exist"));
        result = false;
    }
    while (currentItem != null) {
        COSObject realPrevObject = currentObj;
        if (!validateItem(ctx, currentItem)) {
            result = false;
        }
        currentObj = toCOSObject(currentItem.getCOSObject().getItem(COSName.NEXT));
        if (levelObjects.contains(currentObj)) {
            addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Loop detected: /Next " + currentObj + " is already in the list"));
            return false;
        }
        if (realPrevObject == null) {
            // unclear if this can ever happen
            addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Outline object before " + currentObj + " is null"));
            return false;
        }
        levelObjects.add(currentObj);
        currentItem = currentItem.getNextSibling();
        if (currentItem == null) {
            if (!realPrevObject.equals(lastObj)) {
                addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Last object on a level isn't the expected /Last: " + lastObj + ", but is " + currentObj));
                result = false;
            }
        } else {
            COSObject prevObject = toCOSObject(currentItem.getCOSObject().getItem(COSName.PREV));
            if (!realPrevObject.equals(prevObject)) {
                addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "The value of /Prev at " + currentObj + " doesn't point to previous object " + realPrevObject + ", but to " + prevObject));
                result = false;
            }
        }
    }
    return result;
}
Also used : COSObject(org.apache.pdfbox.cos.COSObject) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem) HashSet(java.util.HashSet)

Aggregations

ValidationError (org.apache.pdfbox.preflight.ValidationResult.ValidationError)108 COSBase (org.apache.pdfbox.cos.COSBase)36 COSDictionary (org.apache.pdfbox.cos.COSDictionary)28 IOException (java.io.IOException)27 COSDocument (org.apache.pdfbox.cos.COSDocument)13 COSObject (org.apache.pdfbox.cos.COSObject)13 COSArray (org.apache.pdfbox.cos.COSArray)10 COSStream (org.apache.pdfbox.cos.COSStream)10 COSString (org.apache.pdfbox.cos.COSString)8 PDColorSpace (org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace)8 ValidationException (org.apache.pdfbox.preflight.exception.ValidationException)8 COSName (org.apache.pdfbox.cos.COSName)7 COSObjectKey (org.apache.pdfbox.cos.COSObjectKey)7 PreflightPath (org.apache.pdfbox.preflight.PreflightPath)6 InputStream (java.io.InputStream)5 ArrayList (java.util.ArrayList)5 PDStream (org.apache.pdfbox.pdmodel.common.PDStream)5 PreflightParser (org.apache.pdfbox.preflight.parser.PreflightParser)5 DublinCoreSchema (org.apache.xmpbox.schema.DublinCoreSchema)5 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)4