use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.
the class AnnotationValidatorFactory method getAnnotationValidator.
/**
* Return an instance of AnnotationValidator.
*
* @param ctx the preflight context.
* @param annotDic the annotation dictionary.
* @return the AnnotationValidator instance.
*/
public final AnnotationValidator getAnnotationValidator(PreflightContext ctx, COSDictionary annotDic) throws ValidationException {
AnnotationValidator result = null;
String subtype = annotDic.getNameAsString(COSName.SUBTYPE);
Class<? extends AnnotationValidator> clazz = this.validatorClasses.get(subtype);
if (clazz == null) {
ctx.addValidationError(new ValidationError(ERROR_ANNOT_FORBIDDEN_SUBTYPE, "The subtype isn't authorized : " + subtype));
} else {
try {
Constructor<? extends AnnotationValidator> constructor = clazz.getDeclaredConstructor(PreflightContext.class, COSDictionary.class);
result = constructor.newInstance(ctx, annotDic);
result.setFactory(this);
} catch (Exception e) {
throw new ValidationException(e.getMessage(), e);
}
}
return result;
}
use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.
the class SynchronizedMetaDataValidation method validateMetadataSynchronization.
/**
* Check if document information entries and XMP information are synchronized
*
* @param document the PDF Document
* @param metadata the XMP MetaData
* @return List of validation errors
* @throws ValidationException
*/
public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata) throws ValidationException {
List<ValidationError> ve = new ArrayList<>();
if (document == null) {
throw new ValidationException("Document provided is null");
} else {
PDDocumentInformation dico = document.getDocumentInformation();
if (metadata == null) {
throw new ValidationException("Metadata provided are null");
} else {
DublinCoreSchema dc = metadata.getDublinCoreSchema();
// TITLE
analyzeTitleProperty(dico, dc, ve);
// AUTHOR
analyzeAuthorProperty(dico, dc, ve);
// SUBJECT
analyzeSubjectProperty(dico, dc, ve);
AdobePDFSchema pdf = metadata.getAdobePDFSchema();
// KEYWORDS
analyzeKeywordsProperty(dico, pdf, ve);
// PRODUCER
analyzeProducerProperty(dico, pdf, ve);
XMPBasicSchema xmp = metadata.getXMPBasicSchema();
// CREATOR TOOL
analyzeCreatorToolProperty(dico, xmp, ve);
// CREATION DATE
analyzeCreationDateProperty(dico, xmp, ve);
// MODIFY DATE
analyzeModifyDateProperty(dico, xmp, ve);
}
}
return ve;
}
use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.
the class PreflightParser method parseXrefTable.
/**
* Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls : -
* EOL mandatory after the 'xref' keyword - Cross reference subsection header uses single white
* space as separator - and so on
*
* @param startByteOffset the offset to start at
* @return false on parsing error
* @throws IOException If an IO error occurs.
*/
@Override
protected boolean parseXrefTable(long startByteOffset) throws IOException {
if (source.peek() != 'x') {
return false;
}
String xref = readString();
if (!xref.equals("xref")) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by a EOL character"));
return false;
}
if (!nextIsEOL()) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by EOL"));
}
// signal start of new XRef
xrefTrailerResolver.nextXrefObj(startByteOffset, XRefType.TABLE);
// Xref tables can have multiple sections. Each starts with a starting object id and a count.
while (true) {
// just after the xref<EOL> there are an integer
// first obj id
long currObjID;
// the number of objects in the xref table
int count;
long offset = source.getPosition();
String line = readLine();
Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
currObjID = Long.parseLong(matcher.group(1));
count = Integer.parseInt(matcher.group(2));
} else {
addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is invalid: '" + line + "' at position " + source.getPosition()));
// reset source cursor to read xref information
source.seek(offset);
// first obj id
currObjID = readObjectNumber();
// the number of objects in the xref table
count = readInt();
}
skipSpaces();
for (int i = 0; i < count; i++) {
if (source.isEOF() || isEndOfName((char) source.peek())) {
break;
}
if (source.peek() == 't') {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line but 't' found"));
break;
}
// Ignore table contents
String currentLine = readLine();
String[] splitString = currentLine.split(" ");
if (splitString.length < 3) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: " + currentLine));
break;
}
// This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
if (splitString[splitString.length - 1].equals("n")) {
try {
long currOffset = Long.parseLong(splitString[0]);
int currGenID = Integer.parseInt(splitString[1]);
COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
xrefTrailerResolver.setXRef(objKey, currOffset);
} catch (NumberFormatException e) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid can't be read as number " + e.getMessage(), e));
}
} else if (!splitString[2].equals("f")) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable Entry - ObjID:" + currObjID));
}
currObjID++;
skipSpaces();
}
skipSpaces();
if (!isDigit()) {
break;
}
}
return true;
}
use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.
the class PreflightParser method parse.
/**
* Parse the given file and check if it is a confirming file according to the given format.
*
* @param format
* format that the document should follow (default {@link Format#PDF_A1B})
* @param config
* Configuration bean that will be used by the PreflightDocument. If null the format is used to determine
* the default configuration.
* @throws IOException
*/
public void parse(Format format, PreflightConfiguration config) throws IOException {
checkPdfHeader();
try {
super.parse();
} catch (IOException e) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
throw new SyntaxValidationException(e, this.validationResult);
} finally {
// TODO move file handling outside of the parser
IOUtils.closeQuietly(source);
}
Format formatToUse = (format == null ? Format.PDF_A1B : format);
createPdfADocument(formatToUse, config);
createContext();
}
use of org.apache.pdfbox.preflight.ValidationResult.ValidationError in project pdfbox by apache.
the class PreflightParser method checkStreamKeyWord.
/**
* 'stream' must be followed by <CR><LF> or only <LF>
*
* @throws IOException
*/
protected void checkStreamKeyWord() throws IOException {
String streamV = readString();
if (!streamV.equals("stream")) {
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV + "' at offset " + source.getPosition()));
}
int nextChar = source.read();
if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10)) {
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword at offset " + source.getPosition()));
}
// set the offset before stream
source.seek(source.getPosition() - 7);
}
Aggregations