Search in sources :

Example 6 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class AnnotationValidatorFactory method getAnnotationValidator.

/**
 * Return an instance of AnnotationValidator.
 *
 * @param ctx the preflight context.
 * @param annotDic the annotation dictionary.
 * @return the AnnotationValidator instance.
 */
public final AnnotationValidator getAnnotationValidator(PreflightContext ctx, COSDictionary annotDic) throws ValidationException {
    AnnotationValidator result = null;
    String subtype = annotDic.getNameAsString(COSName.SUBTYPE);
    Class<? extends AnnotationValidator> clazz = this.validatorClasses.get(subtype);
    if (clazz == null) {
        ctx.addValidationError(new ValidationError(ERROR_ANNOT_FORBIDDEN_SUBTYPE, "The subtype isn't authorized : " + subtype));
    } else {
        try {
            Constructor<? extends AnnotationValidator> constructor = clazz.getDeclaredConstructor(PreflightContext.class, COSDictionary.class);
            result = constructor.newInstance(ctx, annotDic);
            result.setFactory(this);
        } catch (Exception e) {
            throw new ValidationException(e.getMessage(), e);
        }
    }
    return result;
}
Also used : ValidationException(org.apache.pdfbox.preflight.exception.ValidationException) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) ValidationException(org.apache.pdfbox.preflight.exception.ValidationException)

Example 7 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class SynchronizedMetaDataValidation method validateMetadataSynchronization.

/**
 * Check if document information entries and XMP information are synchronized
 *
 * @param document the PDF Document
 * @param metadata the XMP MetaData
 * @return List of validation errors
 * @throws ValidationException
 */
public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata) throws ValidationException {
    List<ValidationError> ve = new ArrayList<>();
    if (document == null) {
        throw new ValidationException("Document provided is null");
    } else {
        PDDocumentInformation dico = document.getDocumentInformation();
        if (metadata == null) {
            throw new ValidationException("Metadata provided are null");
        } else {
            DublinCoreSchema dc = metadata.getDublinCoreSchema();
            // TITLE
            analyzeTitleProperty(dico, dc, ve);
            // AUTHOR
            analyzeAuthorProperty(dico, dc, ve);
            // SUBJECT
            analyzeSubjectProperty(dico, dc, ve);
            AdobePDFSchema pdf = metadata.getAdobePDFSchema();
            // KEYWORDS
            analyzeKeywordsProperty(dico, pdf, ve);
            // PRODUCER
            analyzeProducerProperty(dico, pdf, ve);
            XMPBasicSchema xmp = metadata.getXMPBasicSchema();
            // CREATOR TOOL
            analyzeCreatorToolProperty(dico, xmp, ve);
            // CREATION DATE
            analyzeCreationDateProperty(dico, xmp, ve);
            // MODIFY DATE
            analyzeModifyDateProperty(dico, xmp, ve);
        }
    }
    return ve;
}
Also used : ValidationException(org.apache.pdfbox.preflight.exception.ValidationException) ArrayList(java.util.ArrayList) XMPBasicSchema(org.apache.xmpbox.schema.XMPBasicSchema) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) DublinCoreSchema(org.apache.xmpbox.schema.DublinCoreSchema) AdobePDFSchema(org.apache.xmpbox.schema.AdobePDFSchema) PDDocumentInformation(org.apache.pdfbox.pdmodel.PDDocumentInformation)

Example 8 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class PreflightParser method parseXrefTable.

/**
 * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls : -
 * EOL mandatory after the 'xref' keyword - Cross reference subsection header uses single white
 * space as separator - and so on
 *
 * @param startByteOffset the offset to start at
 * @return false on parsing error
 * @throws IOException If an IO error occurs.
 */
@Override
protected boolean parseXrefTable(long startByteOffset) throws IOException {
    if (source.peek() != 'x') {
        return false;
    }
    String xref = readString();
    if (!xref.equals("xref")) {
        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by a EOL character"));
        return false;
    }
    if (!nextIsEOL()) {
        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by EOL"));
    }
    // signal start of new XRef
    xrefTrailerResolver.nextXrefObj(startByteOffset, XRefType.TABLE);
    // Xref tables can have multiple sections. Each starts with a starting object id and a count.
    while (true) {
        // just after the xref<EOL> there are an integer
        // first obj id
        long currObjID;
        // the number of objects in the xref table
        int count;
        long offset = source.getPosition();
        String line = readLine();
        Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
        Matcher matcher = pattern.matcher(line);
        if (matcher.matches()) {
            currObjID = Long.parseLong(matcher.group(1));
            count = Integer.parseInt(matcher.group(2));
        } else {
            addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is invalid: '" + line + "' at position " + source.getPosition()));
            // reset source cursor to read xref information
            source.seek(offset);
            // first obj id
            currObjID = readObjectNumber();
            // the number of objects in the xref table
            count = readInt();
        }
        skipSpaces();
        for (int i = 0; i < count; i++) {
            if (source.isEOF() || isEndOfName((char) source.peek())) {
                break;
            }
            if (source.peek() == 't') {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line but 't' found"));
                break;
            }
            // Ignore table contents
            String currentLine = readLine();
            String[] splitString = currentLine.split(" ");
            if (splitString.length < 3) {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: " + currentLine));
                break;
            }
            // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
            if (splitString[splitString.length - 1].equals("n")) {
                try {
                    long currOffset = Long.parseLong(splitString[0]);
                    int currGenID = Integer.parseInt(splitString[1]);
                    COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
                    xrefTrailerResolver.setXRef(objKey, currOffset);
                } catch (NumberFormatException e) {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid can't be read as number " + e.getMessage(), e));
                }
            } else if (!splitString[2].equals("f")) {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable Entry - ObjID:" + currObjID));
            }
            currObjID++;
            skipSpaces();
        }
        skipSpaces();
        if (!isDigit()) {
            break;
        }
    }
    return true;
}
Also used : COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) COSString(org.apache.pdfbox.cos.COSString) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 9 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class PreflightParser method parse.

/**
 * Parse the given file and check if it is a confirming file according to the given format.
 *
 * @param format
 *            format that the document should follow (default {@link Format#PDF_A1B})
 * @param config
 *            Configuration bean that will be used by the PreflightDocument. If null the format is used to determine
 *            the default configuration.
 * @throws IOException
 */
public void parse(Format format, PreflightConfiguration config) throws IOException {
    checkPdfHeader();
    try {
        super.parse();
    } catch (IOException e) {
        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
        throw new SyntaxValidationException(e, this.validationResult);
    } finally {
        // TODO move file handling outside of the parser
        IOUtils.closeQuietly(source);
    }
    Format formatToUse = (format == null ? Format.PDF_A1B : format);
    createPdfADocument(formatToUse, config);
    createContext();
}
Also used : Format(org.apache.pdfbox.preflight.Format) SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) IOException(java.io.IOException) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 10 with ValidationError

use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.

the class PreflightParser method checkStreamKeyWord.

/**
 * 'stream' must be followed by &lt;CR&gt;&lt;LF&gt; or only &lt;LF&gt;
 *
 * @throws IOException
 */
protected void checkStreamKeyWord() throws IOException {
    String streamV = readString();
    if (!streamV.equals("stream")) {
        addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV + "' at offset " + source.getPosition()));
    }
    int nextChar = source.read();
    if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10)) {
        addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword at offset " + source.getPosition()));
    }
    // set the offset before stream
    source.seek(source.getPosition() - 7);
}
Also used : COSString(org.apache.pdfbox.cos.COSString) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Aggregations

ValidationError (org.apache.pdfbox.preflight.ValidationResult.ValidationError)108 COSBase (org.apache.pdfbox.cos.COSBase)36 COSDictionary (org.apache.pdfbox.cos.COSDictionary)28 IOException (java.io.IOException)27 COSDocument (org.apache.pdfbox.cos.COSDocument)13 COSObject (org.apache.pdfbox.cos.COSObject)13 COSArray (org.apache.pdfbox.cos.COSArray)10 COSStream (org.apache.pdfbox.cos.COSStream)10 COSString (org.apache.pdfbox.cos.COSString)8 PDColorSpace (org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace)8 ValidationException (org.apache.pdfbox.preflight.exception.ValidationException)8 COSName (org.apache.pdfbox.cos.COSName)7 COSObjectKey (org.apache.pdfbox.cos.COSObjectKey)7 PreflightPath (org.apache.pdfbox.preflight.PreflightPath)6 InputStream (java.io.InputStream)5 ArrayList (java.util.ArrayList)5 PDStream (org.apache.pdfbox.pdmodel.common.PDStream)5 PreflightParser (org.apache.pdfbox.preflight.parser.PreflightParser)5 DublinCoreSchema (org.apache.xmpbox.schema.DublinCoreSchema)5 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)4