* Create AccessControlList if appropriate properties are configured.
* @param context ProcessContext
* @param flowFile FlowFile
* @return AccessControlList or null if no ACL properties were specified
protected final AccessControlList createACL(final ProcessContext context, final FlowFile flowFile) {
// lazy-initialize ACL, as it should not be used if no properties were specified
AccessControlList acl = null;
final String ownerId = context.getProperty(OWNER).evaluateAttributeExpressions(flowFile).getValue();
if (!StringUtils.isEmpty(ownerId)) {
final Owner owner = new Owner();
if (acl == null) {
acl = new AccessControlList();
for (final Grantee grantee : createGrantees(context.getProperty(FULL_CONTROL_USER_LIST).evaluateAttributeExpressions(flowFile).getValue())) {
if (acl == null) {
acl = new AccessControlList();
acl.grantPermission(grantee, Permission.FullControl);
for (final Grantee grantee : createGrantees(context.getProperty(READ_USER_LIST).evaluateAttributeExpressions(flowFile).getValue())) {
if (acl == null) {
acl = new AccessControlList();
acl.grantPermission(grantee, Permission.Read);
for (final Grantee grantee : createGrantees(context.getProperty(WRITE_USER_LIST).evaluateAttributeExpressions(flowFile).getValue())) {
if (acl == null) {
acl = new AccessControlList();
acl.grantPermission(grantee, Permission.Write);
for (final Grantee grantee : createGrantees(context.getProperty(READ_ACL_LIST).evaluateAttributeExpressions(flowFile).getValue())) {
if (acl == null) {
acl = new AccessControlList();
acl.grantPermission(grantee, Permission.ReadAcp);
for (final Grantee grantee : createGrantees(context.getProperty(WRITE_ACL_LIST).evaluateAttributeExpressions(flowFile).getValue())) {
if (acl == null) {
acl = new AccessControlList();
acl.grantPermission(grantee, Permission.WriteAcp);
return acl;
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
final long startNanos = System.nanoTime();
final String bucket = context.getProperty(BUCKET).evaluateAttributeExpressions(flowFile).getValue();
final String key = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
final String cacheKey = getIdentifier() + "/" + bucket + "/" + key;
final AmazonS3Client s3 = getClient();
final FlowFile ff = flowFile;
final Map<String, String> attributes = new HashMap<>();
final String ffFilename = ff.getAttributes().get(CoreAttributes.FILENAME.key());
attributes.put(S3_BUCKET_KEY, bucket);
attributes.put(S3_OBJECT_KEY, key);
final Long multipartThreshold = context.getProperty(MULTIPART_THRESHOLD).asDataSize(DataUnit.B).longValue();
final Long multipartPartSize = context.getProperty(MULTIPART_PART_SIZE).asDataSize(DataUnit.B).longValue();
final long now = System.currentTimeMillis();
* If necessary, run age off for existing uploads in AWS S3 and local state
ageoffS3Uploads(context, s3, now);
* Then
try {, new InputStreamCallback() {
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
final ObjectMetadata objectMetadata = new ObjectMetadata();
final String contentType = context.getProperty(CONTENT_TYPE).evaluateAttributeExpressions(ff).getValue();
if (contentType != null) {
attributes.put(S3_CONTENT_TYPE, contentType);
final String expirationRule = context.getProperty(EXPIRATION_RULE_ID).evaluateAttributeExpressions(ff).getValue();
if (expirationRule != null) {
final Map<String, String> userMetadata = new HashMap<>();
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
if (entry.getKey().isDynamic()) {
final String value = context.getProperty(entry.getKey()).evaluateAttributeExpressions(ff).getValue();
userMetadata.put(entry.getKey().getName(), value);
final String serverSideEncryption = context.getProperty(SERVER_SIDE_ENCRYPTION).getValue();
if (!serverSideEncryption.equals(NO_SERVER_SIDE_ENCRYPTION)) {
attributes.put(S3_SSE_ALGORITHM, serverSideEncryption);
if (!userMetadata.isEmpty()) {
if (ff.getSize() <= multipartThreshold) {
// ----------------------------------------
// single part upload
// ----------------------------------------
final PutObjectRequest request = new PutObjectRequest(bucket, key, in, objectMetadata);
final AccessControlList acl = createACL(context, ff);
if (acl != null) {
final CannedAccessControlList cannedAcl = createCannedACL(context, ff);
if (cannedAcl != null) {
try {
final PutObjectResult result = s3.putObject(request);
if (result.getVersionId() != null) {
attributes.put(S3_VERSION_ATTR_KEY, result.getVersionId());
if (result.getETag() != null) {
attributes.put(S3_ETAG_ATTR_KEY, result.getETag());
if (result.getExpirationTime() != null) {
attributes.put(S3_EXPIRATION_ATTR_KEY, result.getExpirationTime().toString());
if (result.getMetadata().getRawMetadata().keySet().contains(S3_STORAGECLASS_META_KEY)) {
attributes.put(S3_STORAGECLASS_ATTR_KEY, result.getMetadata().getRawMetadataValue(S3_STORAGECLASS_META_KEY).toString());
if (userMetadata.size() > 0) {
StringBuilder userMetaBldr = new StringBuilder();
for (String userKey : userMetadata.keySet()) {
attributes.put(S3_USERMETA_ATTR_KEY, userMetaBldr.toString());
} catch (AmazonClientException e) {
getLogger().info("Failure completing upload flowfile={} bucket={} key={} reason={}", new Object[] { ffFilename, bucket, key, e.getMessage() });
throw (e);
} else {
// ----------------------------------------
// multipart upload
// ----------------------------------------
// load or create persistent state
// ------------------------------------------------------------
MultipartState currentState;
try {
currentState = getLocalStateIfInS3(s3, bucket, cacheKey);
if (currentState != null) {
if (currentState.getPartETags().size() > 0) {
final PartETag lastETag = currentState.getPartETags().get(currentState.getPartETags().size() - 1);
getLogger().info("Resuming upload for flowfile='{}' bucket='{}' key='{}' " + "uploadID='{}' filePosition='{}' partSize='{}' storageClass='{}' " + "contentLength='{}' partsLoaded={} lastPart={}/{}", new Object[] { ffFilename, bucket, key, currentState.getUploadId(), currentState.getFilePosition(), currentState.getPartSize(), currentState.getStorageClass().toString(), currentState.getContentLength(), currentState.getPartETags().size(), Integer.toString(lastETag.getPartNumber()), lastETag.getETag() });
} else {
getLogger().info("Resuming upload for flowfile='{}' bucket='{}' key='{}' " + "uploadID='{}' filePosition='{}' partSize='{}' storageClass='{}' " + "contentLength='{}' no partsLoaded", new Object[] { ffFilename, bucket, key, currentState.getUploadId(), currentState.getFilePosition(), currentState.getPartSize(), currentState.getStorageClass().toString(), currentState.getContentLength() });
} else {
currentState = new MultipartState();
persistLocalState(cacheKey, currentState);
getLogger().info("Starting new upload for flowfile='{}' bucket='{}' key='{}'", new Object[] { ffFilename, bucket, key });
} catch (IOException e) {
getLogger().error("IOException initiating cache state while processing flow files: " + e.getMessage());
throw (e);
// ------------------------------------------------------------
if (currentState.getUploadId().isEmpty()) {
final InitiateMultipartUploadRequest initiateRequest = new InitiateMultipartUploadRequest(bucket, key, objectMetadata);
final AccessControlList acl = createACL(context, ff);
if (acl != null) {
final CannedAccessControlList cannedAcl = createCannedACL(context, ff);
if (cannedAcl != null) {
try {
final InitiateMultipartUploadResult initiateResult = s3.initiateMultipartUpload(initiateRequest);
try {
persistLocalState(cacheKey, currentState);
} catch (Exception e) {
getLogger().info("Exception saving cache state while processing flow file: " + e.getMessage());
throw (new ProcessException("Exception saving cache state", e));
getLogger().info("Success initiating upload flowfile={} available={} position={} " + "length={} bucket={} key={} uploadId={}", new Object[] { ffFilename, in.available(), currentState.getFilePosition(), currentState.getContentLength(), bucket, key, currentState.getUploadId() });
if (initiateResult.getUploadId() != null) {
attributes.put(S3_UPLOAD_ID_ATTR_KEY, initiateResult.getUploadId());
} catch (AmazonClientException e) {
getLogger().info("Failure initiating upload flowfile={} bucket={} key={} reason={}", new Object[] { ffFilename, bucket, key, e.getMessage() });
throw (e);
} else {
if (currentState.getFilePosition() > 0) {
try {
final long skipped = in.skip(currentState.getFilePosition());
if (skipped != currentState.getFilePosition()) {
getLogger().info("Failure skipping to resume upload flowfile={} " + "bucket={} key={} position={} skipped={}", new Object[] { ffFilename, bucket, key, currentState.getFilePosition(), skipped });
} catch (Exception e) {
getLogger().info("Failure skipping to resume upload flowfile={} bucket={} " + "key={} position={} reason={}", new Object[] { ffFilename, bucket, key, currentState.getFilePosition(), e.getMessage() });
throw (new ProcessException(e));
// upload parts
// ------------------------------------------------------------
long thisPartSize;
for (int part = currentState.getPartETags().size() + 1; currentState.getFilePosition() < currentState.getContentLength(); part++) {
if (!PutS3Object.this.isScheduled()) {
throw new IOException(S3_PROCESS_UNSCHEDULED_MESSAGE + " flowfile=" + ffFilename + " part=" + part + " uploadId=" + currentState.getUploadId());
thisPartSize = Math.min(currentState.getPartSize(), (currentState.getContentLength() - currentState.getFilePosition()));
UploadPartRequest uploadRequest = new UploadPartRequest().withBucketName(bucket).withKey(key).withUploadId(currentState.getUploadId()).withInputStream(in).withPartNumber(part).withPartSize(thisPartSize);
try {
UploadPartResult uploadPartResult = s3.uploadPart(uploadRequest);
currentState.setFilePosition(currentState.getFilePosition() + thisPartSize);
try {
persistLocalState(cacheKey, currentState);
} catch (Exception e) {
getLogger().info("Exception saving cache state processing flow file: " + e.getMessage());
getLogger().info("Success uploading part flowfile={} part={} available={} " + "etag={} uploadId={}", new Object[] { ffFilename, part, in.available(), uploadPartResult.getETag(), currentState.getUploadId() });
} catch (AmazonClientException e) {
getLogger().info("Failure uploading part flowfile={} part={} bucket={} key={} " + "reason={}", new Object[] { ffFilename, part, bucket, key, e.getMessage() });
throw (e);
// complete multipart upload
// ------------------------------------------------------------
CompleteMultipartUploadRequest completeRequest = new CompleteMultipartUploadRequest(bucket, key, currentState.getUploadId(), currentState.getPartETags());
try {
CompleteMultipartUploadResult completeResult = s3.completeMultipartUpload(completeRequest);
getLogger().info("Success completing upload flowfile={} etag={} uploadId={}", new Object[] { ffFilename, completeResult.getETag(), currentState.getUploadId() });
if (completeResult.getVersionId() != null) {
attributes.put(S3_VERSION_ATTR_KEY, completeResult.getVersionId());
if (completeResult.getETag() != null) {
attributes.put(S3_ETAG_ATTR_KEY, completeResult.getETag());
if (completeResult.getExpirationTime() != null) {
attributes.put(S3_EXPIRATION_ATTR_KEY, completeResult.getExpirationTime().toString());
if (currentState.getStorageClass() != null) {
attributes.put(S3_STORAGECLASS_ATTR_KEY, currentState.getStorageClass().toString());
if (userMetadata.size() > 0) {
StringBuilder userMetaBldr = new StringBuilder();
for (String userKey : userMetadata.keySet()) {
attributes.put(S3_USERMETA_ATTR_KEY, userMetaBldr.toString());
} catch (AmazonClientException e) {
getLogger().info("Failure completing upload flowfile={} bucket={} key={} reason={}", new Object[] { ffFilename, bucket, key, e.getMessage() });
throw (e);
if (!attributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
final String url = s3.getResourceUrl(bucket, key);
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
session.getProvenanceReporter().send(flowFile, url, millis);
getLogger().info("Successfully put {} to Amazon S3 in {} milliseconds", new Object[] { ff, millis });
try {
} catch (IOException e) {
getLogger().info("Error trying to delete key {} from cache: {}", new Object[] { cacheKey, e.getMessage() });
} catch (final ProcessException | AmazonClientException pe) {
if (pe.getMessage().contains(S3_PROCESS_UNSCHEDULED_MESSAGE)) {
} else {
getLogger().error("Failed to put {} to Amazon S3 due to {}", new Object[] { flowFile, pe });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
public boolean publishResource(String objectPath) {
AccessControlList acl = s3.getObjectAcl(bucketName, objectPath);
acl.grantPermission(GroupGrantee.AllUsers, Permission.Read);
s3.setObjectAcl(bucketName, objectPath, acl);
return true;
public static void main(String[] args) throws IOException {
Regions clientRegion = Regions.DEFAULT_REGION;
String bucketName = "*** Bucket name ***";
String userEmailForReadPermission = "*** ***";
try {
AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(clientRegion).build();
// Create a bucket with a canned ACL. This ACL will be replaced by the setBucketAcl()
// calls below. It is included here for demonstration purposes.
CreateBucketRequest createBucketRequest = new CreateBucketRequest(bucketName, clientRegion.getName()).withCannedAcl(CannedAccessControlList.LogDeliveryWrite);
// Create a collection of grants to add to the bucket.
ArrayList<Grant> grantCollection = new ArrayList<Grant>();
// Grant the account owner full control.
Grant grant1 = new Grant(new CanonicalGrantee(s3Client.getS3AccountOwner().getId()), Permission.FullControl);
// Grant the LogDelivery group permission to write to the bucket.
Grant grant2 = new Grant(GroupGrantee.LogDelivery, Permission.Write);
// Save grants by replacing all current ACL grants with the two we just created.
AccessControlList bucketAcl = new AccessControlList();
bucketAcl.grantAllPermissions(grantCollection.toArray(new Grant[0]));
s3Client.setBucketAcl(bucketName, bucketAcl);
// Retrieve the bucket's ACL, add another grant, and then save the new ACL.
AccessControlList newBucketAcl = s3Client.getBucketAcl(bucketName);
Grant grant3 = new Grant(new EmailAddressGrantee(userEmailForReadPermission), Permission.Read);
s3Client.setBucketAcl(bucketName, newBucketAcl);
} catch (AmazonServiceException e) {
// The call was transmitted successfully, but Amazon S3 couldn't process
// it and returned an error response.
} catch (SdkClientException e) {
// Amazon S3 couldn't be contacted for a response, or the client
// couldn't parse the response from Amazon S3.
public static void getBucketAcl(String bucket_name) {
System.out.println("Retrieving ACL for bucket: " + bucket_name);
final AmazonS3 s3 = AmazonS3ClientBuilder.standard().withRegion(Regions.DEFAULT_REGION).build();
try {
AccessControlList acl = s3.getBucketAcl(bucket_name);
List<Grant> grants = acl.getGrantsAsList();
for (Grant grant : grants) {
System.out.format(" %s: %s\n", grant.getGrantee().getIdentifier(), grant.getPermission().toString());
} catch (AmazonServiceException e) {