Search in sources :

Example 26 with COSObjectKey

use of org.apache.pdfbox.cos.COSObjectKey in project pdfbox by apache.

the class PreflightParser method initialParse.

// --------------------------------------------------------
// - Below All methods that adds controls on the PDF syntax
// --------------------------------------------------------
@Override
protected /**
 * Fill the CosDocument with some object that isn't set by the NonSequentialParser
 */
void initialParse() throws IOException {
    super.initialParse();
    // For each ObjectKey, we check if the object has been loaded
    // useful for linearized PDFs
    Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
    for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet()) {
        COSObject co = document.getObjectFromPool(entry.getKey());
        if (co.getObject() == null) {
            // object isn't loaded - parse the object to load its content
            parseObjectDynamically(co, true);
        }
    }
}
Also used : COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) COSObject(org.apache.pdfbox.cos.COSObject)

Example 27 with COSObjectKey

use of org.apache.pdfbox.cos.COSObjectKey in project pdfbox by apache.

the class PreflightParser method parseObjectDynamically.

@Override
protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj) throws IOException {
    // ---- create object key and get object (container) from pool
    final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr);
    final COSObject pdfObject = document.getObjectFromPool(objKey);
    if (pdfObject.getObject() == null) {
        // not previously parsed
        // ---- read offset or object stream object number from xref table
        Long offsetOrObjstmObNr = document.getXrefTable().get(objKey);
        // sanity test to circumvent loops with broken documents
        if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null))) {
            addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()));
            throw new SyntaxValidationException("Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
        }
        if (offsetOrObjstmObNr == null) {
            // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
            pdfObject.setObject(COSNull.NULL);
        } else if (offsetOrObjstmObNr == 0) {
            addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber() + ":" + objKey.getGeneration() + "} has an offset of 0"));
        } else if (offsetOrObjstmObNr > 0) {
            // offset of indirect object in file
            // ---- go to object start
            source.seek(offsetOrObjstmObNr);
            // ---- we must have an indirect object
            long readObjNr;
            int readObjGen;
            long offset = source.getPosition();
            String line = readLine();
            Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
            Matcher matcher = pattern.matcher(line);
            if (matcher.matches()) {
                readObjNr = Long.parseLong(matcher.group(1));
                readObjGen = Integer.parseInt(matcher.group(2));
            } else {
                addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset=" + offset + "; key=" + offsetOrObjstmObNr.toString() + "; line=" + line + "; object=" + pdfObject.toString() + "]"));
                // reset source cursor to read object information
                source.seek(offset);
                readObjNr = readObjectNumber();
                readObjGen = readGenerationNumber();
                // skip spaces between Object Generation number and the 'obj' keyword
                skipSpaces();
                for (char c : OBJ_MARKER) {
                    if (source.read() != c) {
                        addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'"));
                        throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'", validationResult);
                    }
                }
            }
            // ---- consistency check
            if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) {
                throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr + ":" + readObjGen);
            }
            skipSpaces();
            COSBase pb = parseDirObject();
            skipSpaces();
            long endObjectOffset = source.getPosition();
            String endObjectKey = readString();
            if (endObjectKey.equals("stream")) {
                source.seek(endObjectOffset);
                if (pb instanceof COSDictionary) {
                    COSStream stream = parseCOSStream((COSDictionary) pb);
                    if (securityHandler != null) {
                        securityHandler.decryptStream(stream, objNr, objGenNr);
                    }
                    pb = stream;
                } else {
                    // the combination of a dict and the stream/endstream forms a complete stream object
                    throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
                }
                skipSpaces();
                endObjectOffset = source.getPosition();
                endObjectKey = readString();
                // we have case with a second 'endstream' before endobj
                if (!endObjectKey.startsWith("endobj")) {
                    if (endObjectKey.startsWith("endstream")) {
                        endObjectKey = endObjectKey.substring(9).trim();
                        if (endObjectKey.length() == 0) {
                            // no other characters in extra endstream line
                            // read next line
                            endObjectKey = readString();
                        }
                    }
                }
            } else if (securityHandler != null) {
                securityHandler.decrypt(pb, objNr, objGenNr);
            }
            pdfObject.setObject(pb);
            if (!endObjectKey.startsWith("endobj")) {
                throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'.");
            } else {
                offset = source.getPosition();
                source.seek(endObjectOffset - 1);
                if (!nextIsEOL()) {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword at offset " + source.getPosition()));
                }
                source.seek(offset);
            }
            if (!nextIsEOL()) {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword at offset " + source.getPosition()));
            }
        } else {
            // xref value is object nr of object stream containing object to be parsed;
            // since our object was not found it means object stream was not parsed so far
            final int objstmObjNr = (int) (-offsetOrObjstmObNr);
            final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
            if (objstmBaseObj instanceof COSStream) {
                // parse object stream
                PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
                parser.parse();
                // register all objects which are referenced to be contained in object stream
                for (COSObject next : parser.getObjects()) {
                    COSObjectKey stmObjKey = new COSObjectKey(next);
                    Long offset = document.getXrefTable().get(stmObjKey);
                    if (offset != null && offset == -objstmObjNr) {
                        COSObject stmObj = document.getObjectFromPool(stmObjKey);
                        stmObj.setObject(next.getObject());
                    }
                }
            }
        }
    }
    return pdfObject.getObject();
}
Also used : Pattern(java.util.regex.Pattern) COSStream(org.apache.pdfbox.cos.COSStream) COSDictionary(org.apache.pdfbox.cos.COSDictionary) Matcher(java.util.regex.Matcher) SyntaxValidationException(org.apache.pdfbox.preflight.exception.SyntaxValidationException) COSString(org.apache.pdfbox.cos.COSString) IOException(java.io.IOException) PDFObjectStreamParser(org.apache.pdfbox.pdfparser.PDFObjectStreamParser) COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) COSObject(org.apache.pdfbox.cos.COSObject) COSBase(org.apache.pdfbox.cos.COSBase) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 28 with COSObjectKey

use of org.apache.pdfbox.cos.COSObjectKey in project pdfbox by apache.

the class CatalogValidationProcess method validateOutputIntent.

/**
 * This method checks the content of each OutputIntent. The S entry must contain GTS_PDFA1. The DestOuputProfile
 * must contain a valid ICC Profile Stream.
 *
 * If there are more than one OutputIntent, they have to use the same ICC Profile.
 *
 * This method returns a list of ValidationError. It is empty if no errors have been found.
 *
 * @param ctx
 * @throws ValidationException
 */
public void validateOutputIntent(PreflightContext ctx) throws ValidationException {
    COSDocument cosDocument = ctx.getDocument().getDocument();
    COSBase cBase = catalog.getCOSObject().getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS));
    COSArray outputIntents = COSUtils.getAsArray(cBase, cosDocument);
    Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<>();
    for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) {
        COSDictionary outputIntentDict = COSUtils.getAsDictionary(outputIntents.get(i), cosDocument);
        if (outputIntentDict == null) {
            addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "OutputIntent object is null or isn't a dictionary"));
        } else {
            // S entry is mandatory and must be equals to GTS_PDFA1
            String sValue = outputIntentDict.getNameAsString(OUTPUT_INTENT_DICTIONARY_KEY_S);
            if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) {
                addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID, "The S entry of the OutputIntent isn't GTS_PDFA1"));
                continue;
            }
            // OutputConditionIdentifier is a mandatory field
            String outputConditionIdentifier = outputIntentDict.getString(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER);
            if (outputConditionIdentifier == null) {
                // empty string is authorized (it may be an application specific value)
                addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The OutputIntentCondition is missing"));
                continue;
            }
            /*
                 * If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization : DestOutputProfile
                 * and Info are mandatory DestOutputProfile must be a ICC Profile
                 * 
                 * Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile is
                 * checked even if the OutputConditionIdentifier isn't "Custom"
                 */
            COSBase destOutputProfile = outputIntentDict.getItem(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE);
            validateICCProfile(destOutputProfile, tmpDestOutputProfile, ctx);
            PreflightConfiguration config = ctx.getConfig();
            if (config.isLazyValidation() && !isStandardICCCharacterization(outputConditionIdentifier)) {
                String info = outputIntentDict.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO));
                if (info == null || "".equals(info)) {
                    ValidationError error = new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The Info entry of a OutputIntent dictionary is missing");
                    error.setWarning(true);
                    addValidationError(ctx, error);
                    continue;
                }
            }
        }
    }
}
Also used : COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) PreflightConfiguration(org.apache.pdfbox.preflight.PreflightConfiguration) COSArray(org.apache.pdfbox.cos.COSArray) COSDictionary(org.apache.pdfbox.cos.COSDictionary) HashMap(java.util.HashMap) COSDocument(org.apache.pdfbox.cos.COSDocument) COSBase(org.apache.pdfbox.cos.COSBase) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError)

Example 29 with COSObjectKey

use of org.apache.pdfbox.cos.COSObjectKey in project pdfbox by apache.

the class ActionManagerFactory method getActionManagers.

/**
 * This method extract actions from the given dictionary. An action is identified by the following entries :
 * <UL>
 * <li>A (Action) : Available in Annotations, Outline items
 * <li>OpenAction (OpenAction) : Available in the Catalog dictionary
 * <li>AA (Additional Action) : Available in the Catalog dictionary, Annotations, Pages
 * </UL>
 *
 * If there are no action, an empty list is returned.
 *
 * @param ctx the preflight context.
 * @param dictionary the dictionary to retrieve the actions from.
 * @return the list of actions from the given dictionary.
 * @throws ValidationException
 */
public final List<AbstractActionManager> getActionManagers(PreflightContext ctx, COSDictionary dictionary) throws ValidationException {
    List<AbstractActionManager> result = new ArrayList<>(0);
    Map<COSObjectKey, Boolean> alreadyCreated = new HashMap<>();
    COSBase aDict = dictionary.getDictionaryObject(COSName.A);
    if (aDict != null) {
        callCreateAction(aDict, ctx, result, alreadyCreated);
    }
    COSDocument cosDocument = ctx.getDocument().getDocument();
    COSBase oaDict = dictionary.getDictionaryObject(DICTIONARY_KEY_OPEN_ACTION);
    if (oaDict != null && !COSUtils.isArray(oaDict, cosDocument)) {
        callCreateAction(oaDict, ctx, result, alreadyCreated);
    }
    // else nothing to do because an array contains a Destination and not an Action.
    COSBase aa = dictionary.getDictionaryObject(DICTIONARY_KEY_ADDITIONAL_ACTION);
    if (aa != null) {
        COSDictionary aaDict = COSUtils.getAsDictionary(aa, cosDocument);
        if (aaDict != null) {
            for (Object key : aaDict.keySet()) {
                COSName name = (COSName) key;
                callCreateAction(aaDict.getItem(name), ctx, result, name.getName(), alreadyCreated);
            }
        }
    }
    return result;
}
Also used : COSObjectKey(org.apache.pdfbox.cos.COSObjectKey) COSDictionary(org.apache.pdfbox.cos.COSDictionary) COSName(org.apache.pdfbox.cos.COSName) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) COSBase(org.apache.pdfbox.cos.COSBase) COSDocument(org.apache.pdfbox.cos.COSDocument) COSObject(org.apache.pdfbox.cos.COSObject)

Example 30 with COSObjectKey

use of org.apache.pdfbox.cos.COSObjectKey in project pdfbox by apache.

the class COSWriter method writeReference.

/**
 * visitFromObjRef method comment.
 *
 * @param obj The object that is being visited.
 *
 * @throws IOException If there is an exception while visiting this object.
 */
public void writeReference(COSBase obj) throws IOException {
    COSObjectKey key = getObjectKey(obj);
    getStandardOutput().write(String.valueOf(key.getNumber()).getBytes(Charsets.ISO_8859_1));
    getStandardOutput().write(SPACE);
    getStandardOutput().write(String.valueOf(key.getGeneration()).getBytes(Charsets.ISO_8859_1));
    getStandardOutput().write(SPACE);
    getStandardOutput().write(REFERENCE);
}
Also used : COSObjectKey(org.apache.pdfbox.cos.COSObjectKey)

Aggregations

COSObjectKey (org.apache.pdfbox.cos.COSObjectKey)39 COSObject (org.apache.pdfbox.cos.COSObject)25 IOException (java.io.IOException)16 COSDocument (org.apache.pdfbox.cos.COSDocument)13 COSBase (org.apache.pdfbox.cos.COSBase)12 COSDictionary (org.apache.pdfbox.cos.COSDictionary)8 COSStream (org.apache.pdfbox.cos.COSStream)7 ValidationError (org.apache.pdfbox.preflight.ValidationResult.ValidationError)7 COSArray (org.apache.pdfbox.cos.COSArray)6 COSString (org.apache.pdfbox.cos.COSString)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 COSNumber (org.apache.pdfbox.cos.COSNumber)4 COSInteger (org.apache.pdfbox.cos.COSInteger)3 COSName (org.apache.pdfbox.cos.COSName)3 InputStream (java.io.InputStream)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 PDFObjectStreamParser (org.apache.pdfbox.pdfparser.PDFObjectStreamParser)2