Search in sources :

Example 1 with SyntaxValidationException

use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.

the class PreflightParser method parse.

/**
 * Parse the given file and check if it is a confirming file according to the given format.
 *
 * @param format
 *            format that the document should follow (default {@link Format#PDF_A1B})
 * @param config
 *            Configuration bean that will be used by the PreflightDocument. If null the format is used to determine
 *            the default configuration.
 * @throws IOException
 */
public void parse(Format format, PreflightConfiguration config) throws IOException {
    checkPdfHeader();
    try {
        super.parse();
    } catch (IOException e) {
        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
        throw new SyntaxValidationException(e, this.validationResult);
    } finally {
        // TODO move file handling outside of the parser
        IOUtils.closeQuietly(source);
    }
    Format formatToUse = (format == null ? Format.PDF_A1B : format);
    createPdfADocument(formatToUse, config);
    createContext();
}
Also used : Format(org.apache.pdfbox.preflight.Format) SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) IOException(java.io.IOException) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 2 with SyntaxValidationException

use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.

the class XmlResultParser method validate.

public Element validate(Document rdocument, DataSource source) throws IOException {
    String pdfType = null;
    ValidationResult result;
    long before = System.currentTimeMillis();
    try {
        PreflightParser parser = new PreflightParser(source);
        try {
            parser.parse();
            PreflightDocument document = parser.getPreflightDocument();
            document.validate();
            pdfType = document.getSpecification().getFname();
            result = document.getResult();
            document.close();
        } catch (SyntaxValidationException e) {
            result = e.getResult();
        }
    } catch (Exception e) {
        long after = System.currentTimeMillis();
        return generateFailureResponse(rdocument, source.getName(), after - before, pdfType, e);
    }
    long after = System.currentTimeMillis();
    if (result.isValid()) {
        Element preflight = generateResponseSkeleton(rdocument, source.getName(), after - before);
        // valid ?
        Element valid = rdocument.createElement("isValid");
        valid.setAttribute("type", pdfType);
        valid.setTextContent("true");
        preflight.appendChild(valid);
        return preflight;
    } else {
        Element preflight = generateResponseSkeleton(rdocument, source.getName(), after - before);
        // valid ?
        createResponseWithError(rdocument, pdfType, result, preflight);
        return preflight;
    }
}
Also used : SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) Element(org.w3c.dom.Element) ValidationResult(org.apache.pdfbox.preflight.ValidationResult) PreflightDocument(org.apache.pdfbox.preflight.PreflightDocument) IOException(java.io.IOException) SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException)

Example 3 with SyntaxValidationException

use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.

the class PreflightParser method parseObjectDynamically.

@Override
protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj) throws IOException {
    // ---- create object key and get object (container) from pool
    final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr);
    final COSObject pdfObject = document.getObjectFromPool(objKey);
    if (pdfObject.getObject() == null) {
        // not previously parsed
        // ---- read offset or object stream object number from xref table
        Long offsetOrObjstmObNr = document.getXrefTable().get(objKey);
        // sanity test to circumvent loops with broken documents
        if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null))) {
            addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()));
            throw new SyntaxValidationException("Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
        }
        if (offsetOrObjstmObNr == null) {
            // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
            pdfObject.setObject(COSNull.NULL);
        } else if (offsetOrObjstmObNr == 0) {
            addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber() + ":" + objKey.getGeneration() + "} has an offset of 0"));
        } else if (offsetOrObjstmObNr > 0) {
            // offset of indirect object in file
            // ---- go to object start
            source.seek(offsetOrObjstmObNr);
            // ---- we must have an indirect object
            long readObjNr;
            int readObjGen;
            long offset = source.getPosition();
            String line = readLine();
            Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
            Matcher matcher = pattern.matcher(line);
            if (matcher.matches()) {
                readObjNr = Long.parseLong(matcher.group(1));
                readObjGen = Integer.parseInt(matcher.group(2));
            } else {
                addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset=" + offset + "; key=" + offsetOrObjstmObNr.toString() + "; line=" + line + "; object=" + pdfObject.toString() + "]"));
                // reset source cursor to read object information
                source.seek(offset);
                readObjNr = readObjectNumber();
                readObjGen = readGenerationNumber();
                // skip spaces between Object Generation number and the 'obj' keyword
                skipSpaces();
                for (char c : OBJ_MARKER) {
                    if (source.read() != c) {
                        addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'"));
                        throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'", validationResult);
                    }
                }
            }
            // ---- consistency check
            if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) {
                throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr + ":" + readObjGen);
            }
            skipSpaces();
            COSBase pb = parseDirObject();
            skipSpaces();
            long endObjectOffset = source.getPosition();
            String endObjectKey = readString();
            if (endObjectKey.equals("stream")) {
                source.seek(endObjectOffset);
                if (pb instanceof COSDictionary) {
                    COSStream stream = parseCOSStream((COSDictionary) pb);
                    if (securityHandler != null) {
                        securityHandler.decryptStream(stream, objNr, objGenNr);
                    }
                    pb = stream;
                } else {
                    // the combination of a dict and the stream/endstream forms a complete stream object
                    throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
                }
                skipSpaces();
                endObjectOffset = source.getPosition();
                endObjectKey = readString();
                // we have case with a second 'endstream' before endobj
                if (!endObjectKey.startsWith("endobj")) {
                    if (endObjectKey.startsWith("endstream")) {
                        endObjectKey = endObjectKey.substring(9).trim();
                        if (endObjectKey.length() == 0) {
                            // no other characters in extra endstream line
                            // read next line
                            endObjectKey = readString();
                        }
                    }
                }
            } else if (securityHandler != null) {
                securityHandler.decrypt(pb, objNr, objGenNr);
            }
            pdfObject.setObject(pb);
            if (!endObjectKey.startsWith("endobj")) {
                throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'.");
            } else {
                offset = source.getPosition();
                source.seek(endObjectOffset - 1);
                if (!nextIsEOL()) {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword at offset " + source.getPosition()));
                }
                source.seek(offset);
            }
            if (!nextIsEOL()) {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword at offset " + source.getPosition()));
            }
        } else {
            // xref value is object nr of object stream containing object to be parsed;
            // since our object was not found it means object stream was not parsed so far
            final int objstmObjNr = (int) (-offsetOrObjstmObNr);
            final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
            if (objstmBaseObj instanceof COSStream) {
                // parse object stream
                PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
                parser.parse();
                // register all objects which are referenced to be contained in object stream
                for (COSObject next : parser.getObjects()) {
                    COSObjectKey stmObjKey = new COSObjectKey(next);
                    Long offset = document.getXrefTable().get(stmObjKey);
                    if (offset != null && offset == -objstmObjNr) {
                        COSObject stmObj = document.getObjectFromPool(stmObjKey);
                        stmObj.setObject(next.getObject());
                    }
                }
            }
        }
    }
    return pdfObject.getObject();
}
Also used : Pattern(java.util.regex.Pattern) COSStream(org.apache.pdfbox.cos.COSStream) COSDictionary(org.apache.pdfbox.cos.COSDictionary) Matcher(java.util.regex.Matcher) SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) COSString(org.apache.pdfbox.cos.COSString) IOException(java.io.IOException) PDFObjectStreamParser(org.apache.pdfbox.pdfparser.PDFObjectStreamParser) COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) COSObject(org.apache.pdfbox.cos.COSObject) COSBase(org.apache.pdfbox.cos.COSBase) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 4 with SyntaxValidationException

use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.

the class Validator_A1b method runSimple.

private static int runSimple(File file) throws IOException {
    ValidationResult result;
    PreflightParser parser = new PreflightParser(file);
    try {
        parser.parse();
        PreflightDocument document = parser.getPreflightDocument();
        document.validate();
        result = document.getResult();
        document.close();
    } catch (SyntaxValidationException e) {
        result = e.getResult();
    }
    if (result.isValid()) {
        System.out.println("The file " + file.getName() + " is a valid PDF/A-1b file");
        System.out.println();
        return 0;
    } else {
        System.out.println("The file " + file.getName() + " is not a valid PDF/A-1b file, error(s) :");
        for (ValidationError error : result.getErrorsList()) {
            System.out.print(error.getErrorCode() + " : " + error.getDetails());
            if (error.getPageNumber() != null) {
                System.out.println(" on page " + (error.getPageNumber() + 1));
            } else {
                System.out.println();
            }
        }
        System.out.println();
        return -1;
    }
}
Also used : SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) PreflightParser(org.apache.pdfbox.preflight.parser.PreflightParser)

Example 5 with SyntaxValidationException

use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.

the class TestMetadataFiles method checkPDF.

private boolean checkPDF(File pdf) {
    PreflightDocument document = null;
    boolean testResult = false;
    if (pdf.exists()) {
        ValidationResult result = null;
        try {
            PreflightParser parser = new PreflightParser(pdf);
            parser.parse();
            document = (PreflightDocument) parser.getPDDocument();
            document.validate();
            result = document.getResult();
        } catch (SyntaxValidationException e) {
            result = e.getResult();
        } catch (IOException e) {
            fail("An exception occured while parsing the PDF " + pdf + ": " + e);
        }
        if (result != null) {
            testResult = result.isValid();
        }
        if (document != null) {
            try {
                document.close();
            } catch (IOException e) {
            // shouldn't happen;
            }
        }
    } else {
        fail("Can't find the given file " + pdf);
    }
    return testResult;
}
Also used : SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) IOException(java.io.IOException) ValidationResult(org.apache.pdfbox.preflight.ValidationResult) PreflightParser(org.apache.pdfbox.preflight.parser.PreflightParser) PreflightDocument(org.apache.pdfbox.preflight.PreflightDocument)

Aggregations

SyntaxValidationException (org.apache.pdfbox.preflight.exception.SyntaxValidationException)8 PreflightParser (org.apache.pdfbox.preflight.parser.PreflightParser)5 IOException (java.io.IOException)4 ValidationError (org.apache.pdfbox.preflight.ValidationResult.ValidationError)4 Test (org.junit.Test)3 PreflightDocument (org.apache.pdfbox.preflight.PreflightDocument)2 ValidationResult (org.apache.pdfbox.preflight.ValidationResult)2 HashSet (java.util.HashSet)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 COSBase (org.apache.pdfbox.cos.COSBase)1 COSDictionary (org.apache.pdfbox.cos.COSDictionary)1 COSObject (org.apache.pdfbox.cos.COSObject)1 COSObjectKey (org.apache.pdfbox.cos.COSObjectKey)1 COSStream (org.apache.pdfbox.cos.COSStream)1 COSString (org.apache.pdfbox.cos.COSString)1 PDFObjectStreamParser (org.apache.pdfbox.pdfparser.PDFObjectStreamParser)1 Format (org.apache.pdfbox.preflight.Format)1 Element (org.w3c.dom.Element)1