use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.
the class PreflightParser method parse.
/**
* Parse the given file and check if it is a confirming file according to the given format.
*
* @param format
* format that the document should follow (default {@link Format#PDF_A1B})
* @param config
* Configuration bean that will be used by the PreflightDocument. If null the format is used to determine
* the default configuration.
* @throws IOException
*/
public void parse(Format format, PreflightConfiguration config) throws IOException {
checkPdfHeader();
try {
super.parse();
} catch (IOException e) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
throw new SyntaxValidationException(e, this.validationResult);
} finally {
// TODO move file handling outside of the parser
IOUtils.closeQuietly(source);
}
Format formatToUse = (format == null ? Format.PDF_A1B : format);
createPdfADocument(formatToUse, config);
createContext();
}
use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.
the class XmlResultParser method validate.
public Element validate(Document rdocument, DataSource source) throws IOException {
String pdfType = null;
ValidationResult result;
long before = System.currentTimeMillis();
try {
PreflightParser parser = new PreflightParser(source);
try {
parser.parse();
PreflightDocument document = parser.getPreflightDocument();
document.validate();
pdfType = document.getSpecification().getFname();
result = document.getResult();
document.close();
} catch (SyntaxValidationException e) {
result = e.getResult();
}
} catch (Exception e) {
long after = System.currentTimeMillis();
return generateFailureResponse(rdocument, source.getName(), after - before, pdfType, e);
}
long after = System.currentTimeMillis();
if (result.isValid()) {
Element preflight = generateResponseSkeleton(rdocument, source.getName(), after - before);
// valid ?
Element valid = rdocument.createElement("isValid");
valid.setAttribute("type", pdfType);
valid.setTextContent("true");
preflight.appendChild(valid);
return preflight;
} else {
Element preflight = generateResponseSkeleton(rdocument, source.getName(), after - before);
// valid ?
createResponseWithError(rdocument, pdfType, result, preflight);
return preflight;
}
}
use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.
the class PreflightParser method parseObjectDynamically.
@Override
protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj) throws IOException {
// ---- create object key and get object (container) from pool
final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr);
final COSObject pdfObject = document.getObjectFromPool(objKey);
if (pdfObject.getObject() == null) {
// not previously parsed
// ---- read offset or object stream object number from xref table
Long offsetOrObjstmObNr = document.getXrefTable().get(objKey);
// sanity test to circumvent loops with broken documents
if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null))) {
addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()));
throw new SyntaxValidationException("Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
}
if (offsetOrObjstmObNr == null) {
// not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
pdfObject.setObject(COSNull.NULL);
} else if (offsetOrObjstmObNr == 0) {
addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber() + ":" + objKey.getGeneration() + "} has an offset of 0"));
} else if (offsetOrObjstmObNr > 0) {
// offset of indirect object in file
// ---- go to object start
source.seek(offsetOrObjstmObNr);
// ---- we must have an indirect object
long readObjNr;
int readObjGen;
long offset = source.getPosition();
String line = readLine();
Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
readObjNr = Long.parseLong(matcher.group(1));
readObjGen = Integer.parseInt(matcher.group(2));
} else {
addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset=" + offset + "; key=" + offsetOrObjstmObNr.toString() + "; line=" + line + "; object=" + pdfObject.toString() + "]"));
// reset source cursor to read object information
source.seek(offset);
readObjNr = readObjectNumber();
readObjGen = readGenerationNumber();
// skip spaces between Object Generation number and the 'obj' keyword
skipSpaces();
for (char c : OBJ_MARKER) {
if (source.read() != c) {
addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'"));
throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'", validationResult);
}
}
}
// ---- consistency check
if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) {
throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr + ":" + readObjGen);
}
skipSpaces();
COSBase pb = parseDirObject();
skipSpaces();
long endObjectOffset = source.getPosition();
String endObjectKey = readString();
if (endObjectKey.equals("stream")) {
source.seek(endObjectOffset);
if (pb instanceof COSDictionary) {
COSStream stream = parseCOSStream((COSDictionary) pb);
if (securityHandler != null) {
securityHandler.decryptStream(stream, objNr, objGenNr);
}
pb = stream;
} else {
// the combination of a dict and the stream/endstream forms a complete stream object
throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
}
skipSpaces();
endObjectOffset = source.getPosition();
endObjectKey = readString();
// we have case with a second 'endstream' before endobj
if (!endObjectKey.startsWith("endobj")) {
if (endObjectKey.startsWith("endstream")) {
endObjectKey = endObjectKey.substring(9).trim();
if (endObjectKey.length() == 0) {
// no other characters in extra endstream line
// read next line
endObjectKey = readString();
}
}
}
} else if (securityHandler != null) {
securityHandler.decrypt(pb, objNr, objGenNr);
}
pdfObject.setObject(pb);
if (!endObjectKey.startsWith("endobj")) {
throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'.");
} else {
offset = source.getPosition();
source.seek(endObjectOffset - 1);
if (!nextIsEOL()) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword at offset " + source.getPosition()));
}
source.seek(offset);
}
if (!nextIsEOL()) {
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword at offset " + source.getPosition()));
}
} else {
// xref value is object nr of object stream containing object to be parsed;
// since our object was not found it means object stream was not parsed so far
final int objstmObjNr = (int) (-offsetOrObjstmObNr);
final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
if (objstmBaseObj instanceof COSStream) {
// parse object stream
PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
parser.parse();
// register all objects which are referenced to be contained in object stream
for (COSObject next : parser.getObjects()) {
COSObjectKey stmObjKey = new COSObjectKey(next);
Long offset = document.getXrefTable().get(stmObjKey);
if (offset != null && offset == -objstmObjNr) {
COSObject stmObj = document.getObjectFromPool(stmObjKey);
stmObj.setObject(next.getObject());
}
}
}
}
}
return pdfObject.getObject();
}
use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.
the class Validator_A1b method runSimple.
private static int runSimple(File file) throws IOException {
ValidationResult result;
PreflightParser parser = new PreflightParser(file);
try {
parser.parse();
PreflightDocument document = parser.getPreflightDocument();
document.validate();
result = document.getResult();
document.close();
} catch (SyntaxValidationException e) {
result = e.getResult();
}
if (result.isValid()) {
System.out.println("The file " + file.getName() + " is a valid PDF/A-1b file");
System.out.println();
return 0;
} else {
System.out.println("The file " + file.getName() + " is not a valid PDF/A-1b file, error(s) :");
for (ValidationError error : result.getErrorsList()) {
System.out.print(error.getErrorCode() + " : " + error.getDetails());
if (error.getPageNumber() != null) {
System.out.println(" on page " + (error.getPageNumber() + 1));
} else {
System.out.println();
}
}
System.out.println();
return -1;
}
}
use of org.apache.pdfbox.preflight.exception.SyntaxValidationException in project pdfbox by apache.
the class TestMetadataFiles method checkPDF.
private boolean checkPDF(File pdf) {
PreflightDocument document = null;
boolean testResult = false;
if (pdf.exists()) {
ValidationResult result = null;
try {
PreflightParser parser = new PreflightParser(pdf);
parser.parse();
document = (PreflightDocument) parser.getPDDocument();
document.validate();
result = document.getResult();
} catch (SyntaxValidationException e) {
result = e.getResult();
} catch (IOException e) {
fail("An exception occured while parsing the PDF " + pdf + ": " + e);
}
if (result != null) {
testResult = result.isValid();
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
// shouldn't happen;
}
}
} else {
fail("Can't find the given file " + pdf);
}
return testResult;
}
Aggregations