use of com.google.cloud.storage.Storage in project nifi by apache.
the class FetchGCSObject method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final long startNanos = System.nanoTime();
String bucketName = context.getProperty(BUCKET).evaluateAttributeExpressions(flowFile).getValue();
String key = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
Long generation = context.getProperty(GENERATION).evaluateAttributeExpressions(flowFile).asLong();
String encryptionKey = context.getProperty(ENCRYPTION_KEY).evaluateAttributeExpressions(flowFile).getValue();
final Storage storage = getCloudService();
final Map<String, String> attributes = new HashMap<>();
final BlobId blobId = BlobId.of(bucketName, key, generation);
try {
final List<Storage.BlobSourceOption> blobSourceOptions = new ArrayList<>(2);
if (encryptionKey != null) {
blobSourceOptions.add(Storage.BlobSourceOption.decryptionKey(encryptionKey));
}
if (generation != null) {
blobSourceOptions.add(Storage.BlobSourceOption.generationMatch());
}
final Blob blob = storage.get(blobId);
if (blob == null) {
throw new StorageException(404, "Blob " + blobId + " not found");
}
final ReadChannel reader = storage.reader(blobId, blobSourceOptions.toArray(new Storage.BlobSourceOption[blobSourceOptions.size()]));
flowFile = session.importFrom(Channels.newInputStream(reader), flowFile);
attributes.put(BUCKET_ATTR, blob.getBucket());
attributes.put(KEY_ATTR, blob.getName());
if (blob.getSize() != null) {
attributes.put(SIZE_ATTR, String.valueOf(blob.getSize()));
}
if (blob.getCacheControl() != null) {
attributes.put(CACHE_CONTROL_ATTR, blob.getCacheControl());
}
if (blob.getComponentCount() != null) {
attributes.put(COMPONENT_COUNT_ATTR, String.valueOf(blob.getComponentCount()));
}
if (blob.getContentEncoding() != null) {
attributes.put(CONTENT_ENCODING_ATTR, blob.getContentEncoding());
}
if (blob.getContentLanguage() != null) {
attributes.put(CONTENT_LANGUAGE_ATTR, blob.getContentLanguage());
}
if (blob.getContentType() != null) {
attributes.put(CoreAttributes.MIME_TYPE.key(), blob.getContentType());
}
if (blob.getCrc32c() != null) {
attributes.put(CRC32C_ATTR, blob.getCrc32c());
}
if (blob.getCustomerEncryption() != null) {
final BlobInfo.CustomerEncryption encryption = blob.getCustomerEncryption();
attributes.put(ENCRYPTION_ALGORITHM_ATTR, encryption.getEncryptionAlgorithm());
attributes.put(ENCRYPTION_SHA256_ATTR, encryption.getKeySha256());
}
if (blob.getEtag() != null) {
attributes.put(ETAG_ATTR, blob.getEtag());
}
if (blob.getGeneratedId() != null) {
attributes.put(GENERATED_ID_ATTR, blob.getGeneratedId());
}
if (blob.getGeneration() != null) {
attributes.put(GENERATION_ATTR, String.valueOf(blob.getGeneration()));
}
if (blob.getMd5() != null) {
attributes.put(MD5_ATTR, blob.getMd5());
}
if (blob.getMediaLink() != null) {
attributes.put(MEDIA_LINK_ATTR, blob.getMediaLink());
}
if (blob.getMetageneration() != null) {
attributes.put(METAGENERATION_ATTR, String.valueOf(blob.getMetageneration()));
}
if (blob.getOwner() != null) {
final Acl.Entity entity = blob.getOwner();
if (entity instanceof Acl.User) {
attributes.put(OWNER_ATTR, ((Acl.User) entity).getEmail());
attributes.put(OWNER_TYPE_ATTR, "user");
} else if (entity instanceof Acl.Group) {
attributes.put(OWNER_ATTR, ((Acl.Group) entity).getEmail());
attributes.put(OWNER_TYPE_ATTR, "group");
} else if (entity instanceof Acl.Domain) {
attributes.put(OWNER_ATTR, ((Acl.Domain) entity).getDomain());
attributes.put(OWNER_TYPE_ATTR, "domain");
} else if (entity instanceof Acl.Project) {
attributes.put(OWNER_ATTR, ((Acl.Project) entity).getProjectId());
attributes.put(OWNER_TYPE_ATTR, "project");
}
}
if (blob.getSelfLink() != null) {
attributes.put(URI_ATTR, blob.getSelfLink());
}
if (blob.getContentDisposition() != null) {
attributes.put(CONTENT_DISPOSITION_ATTR, blob.getContentDisposition());
final Util.ParsedContentDisposition parsedContentDisposition = Util.parseContentDisposition(blob.getContentDisposition());
if (parsedContentDisposition != null) {
attributes.put(CoreAttributes.FILENAME.key(), parsedContentDisposition.getFileName());
}
}
if (blob.getCreateTime() != null) {
attributes.put(CREATE_TIME_ATTR, String.valueOf(blob.getCreateTime()));
}
if (blob.getUpdateTime() != null) {
attributes.put(UPDATE_TIME_ATTR, String.valueOf(blob.getUpdateTime()));
}
} catch (StorageException e) {
getLogger().error(e.getMessage(), e);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
if (!attributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, attributes);
}
session.transfer(flowFile, REL_SUCCESS);
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
getLogger().info("Successfully retrieved GCS Object for {} in {} millis; routing to success", new Object[] { flowFile, millis });
session.getProvenanceReporter().fetch(flowFile, "https://" + bucketName + ".storage.googleapis.com/" + key, millis);
}
use of com.google.cloud.storage.Storage in project nifi by apache.
the class ListGCSBucket method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
try {
restoreState(context);
} catch (IOException e) {
getLogger().error("Failed to restore processor state; yielding", e);
context.yield();
return;
}
final long startNanos = System.nanoTime();
final String bucket = context.getProperty(BUCKET).getValue();
final String prefix = context.getProperty(PREFIX).getValue();
final boolean useGenerations = context.getProperty(USE_GENERATIONS).asBoolean();
List<Storage.BlobListOption> listOptions = new ArrayList<>();
if (prefix != null) {
listOptions.add(Storage.BlobListOption.prefix(prefix));
}
if (useGenerations) {
listOptions.add(Storage.BlobListOption.versions(true));
}
final Storage storage = getCloudService();
int listCount = 0;
long maxTimestamp = 0L;
Page<Blob> blobPages = storage.list(bucket, listOptions.toArray(new Storage.BlobListOption[listOptions.size()]));
do {
for (Blob blob : blobPages.getValues()) {
long lastModified = blob.getUpdateTime();
if (lastModified < currentTimestamp || lastModified == currentTimestamp && currentKeys.contains(blob.getName())) {
continue;
}
// Create attributes
final Map<String, String> attributes = new HashMap<>();
attributes.put(BUCKET_ATTR, blob.getBucket());
attributes.put(KEY_ATTR, blob.getName());
if (blob.getSize() != null) {
attributes.put(SIZE_ATTR, String.valueOf(blob.getSize()));
}
if (blob.getCacheControl() != null) {
attributes.put(CACHE_CONTROL_ATTR, blob.getCacheControl());
}
if (blob.getComponentCount() != null) {
attributes.put(COMPONENT_COUNT_ATTR, String.valueOf(blob.getComponentCount()));
}
if (blob.getContentDisposition() != null) {
attributes.put(CONTENT_DISPOSITION_ATTR, blob.getContentDisposition());
}
if (blob.getContentEncoding() != null) {
attributes.put(CONTENT_ENCODING_ATTR, blob.getContentEncoding());
}
if (blob.getContentLanguage() != null) {
attributes.put(CONTENT_LANGUAGE_ATTR, blob.getContentLanguage());
}
if (blob.getContentType() != null) {
attributes.put(CoreAttributes.MIME_TYPE.key(), blob.getContentType());
}
if (blob.getCrc32c() != null) {
attributes.put(CRC32C_ATTR, blob.getCrc32c());
}
if (blob.getCustomerEncryption() != null) {
final BlobInfo.CustomerEncryption encryption = blob.getCustomerEncryption();
attributes.put(ENCRYPTION_ALGORITHM_ATTR, encryption.getEncryptionAlgorithm());
attributes.put(ENCRYPTION_SHA256_ATTR, encryption.getKeySha256());
}
if (blob.getEtag() != null) {
attributes.put(ETAG_ATTR, blob.getEtag());
}
if (blob.getGeneratedId() != null) {
attributes.put(GENERATED_ID_ATTR, blob.getGeneratedId());
}
if (blob.getGeneration() != null) {
attributes.put(GENERATION_ATTR, String.valueOf(blob.getGeneration()));
}
if (blob.getMd5() != null) {
attributes.put(MD5_ATTR, blob.getMd5());
}
if (blob.getMediaLink() != null) {
attributes.put(MEDIA_LINK_ATTR, blob.getMediaLink());
}
if (blob.getMetageneration() != null) {
attributes.put(METAGENERATION_ATTR, String.valueOf(blob.getMetageneration()));
}
if (blob.getOwner() != null) {
final Acl.Entity entity = blob.getOwner();
if (entity instanceof Acl.User) {
attributes.put(OWNER_ATTR, ((Acl.User) entity).getEmail());
attributes.put(OWNER_TYPE_ATTR, "user");
} else if (entity instanceof Acl.Group) {
attributes.put(OWNER_ATTR, ((Acl.Group) entity).getEmail());
attributes.put(OWNER_TYPE_ATTR, "group");
} else if (entity instanceof Acl.Domain) {
attributes.put(OWNER_ATTR, ((Acl.Domain) entity).getDomain());
attributes.put(OWNER_TYPE_ATTR, "domain");
} else if (entity instanceof Acl.Project) {
attributes.put(OWNER_ATTR, ((Acl.Project) entity).getProjectId());
attributes.put(OWNER_TYPE_ATTR, "project");
}
}
if (blob.getSelfLink() != null) {
attributes.put(URI_ATTR, blob.getSelfLink());
}
attributes.put(CoreAttributes.FILENAME.key(), blob.getName());
if (blob.getCreateTime() != null) {
attributes.put(CREATE_TIME_ATTR, String.valueOf(blob.getCreateTime()));
}
if (blob.getUpdateTime() != null) {
attributes.put(UPDATE_TIME_ATTR, String.valueOf(blob.getUpdateTime()));
}
// Create the flowfile
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
// Update state
if (lastModified > maxTimestamp) {
maxTimestamp = lastModified;
currentKeys.clear();
}
if (lastModified == maxTimestamp) {
currentKeys.add(blob.getName());
}
listCount++;
}
blobPages = blobPages.getNextPage();
commit(context, session, listCount);
listCount = 0;
} while (blobPages != null);
currentTimestamp = maxTimestamp;
final long listMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
getLogger().info("Successfully listed GCS bucket {} in {} millis", new Object[] { bucket, listMillis });
if (!commit(context, session, listCount)) {
if (currentTimestamp > 0) {
persistState(context);
}
getLogger().debug("No new objects in GCS bucket {} to list. Yielding.", new Object[] { bucket });
context.yield();
}
}
use of com.google.cloud.storage.Storage in project nifi by apache.
the class FetchGCSObjectTest method testBlobIdWithGeneration.
@Test
public void testBlobIdWithGeneration() throws Exception {
reset(storage);
final TestRunner runner = buildNewRunner(getProcessor());
addRequiredPropertiesToRunner(runner);
runner.removeProperty(FetchGCSObject.KEY);
runner.removeProperty(FetchGCSObject.BUCKET);
runner.setProperty(FetchGCSObject.GENERATION, String.valueOf(GENERATION));
runner.assertValid();
final Blob blob = mock(Blob.class);
when(storage.get(any(BlobId.class))).thenReturn(blob);
when(storage.reader(any(BlobId.class), any(Storage.BlobSourceOption.class))).thenReturn(new MockReadChannel(CONTENT));
runner.enqueue("", ImmutableMap.of(BUCKET_ATTR, BUCKET, CoreAttributes.FILENAME.key(), KEY));
runner.run();
ArgumentCaptor<BlobId> blobIdArgumentCaptor = ArgumentCaptor.forClass(BlobId.class);
ArgumentCaptor<Storage.BlobSourceOption> blobSourceOptionArgumentCaptor = ArgumentCaptor.forClass(Storage.BlobSourceOption.class);
verify(storage).get(blobIdArgumentCaptor.capture());
verify(storage).reader(any(BlobId.class), blobSourceOptionArgumentCaptor.capture());
final BlobId blobId = blobIdArgumentCaptor.getValue();
assertEquals(BUCKET, blobId.getBucket());
assertEquals(KEY, blobId.getName());
assertEquals(GENERATION, blobId.getGeneration());
final Set<Storage.BlobSourceOption> blobSourceOptions = ImmutableSet.copyOf(blobSourceOptionArgumentCaptor.getAllValues());
assertTrue(blobSourceOptions.contains(Storage.BlobSourceOption.generationMatch()));
assertEquals(1, blobSourceOptions.size());
}
use of com.google.cloud.storage.Storage in project nifi by apache.
the class ListGCSBucketTest method testAclOwnerGroup.
@Test
public void testAclOwnerGroup() throws Exception {
reset(storage, mockBlobPages);
final ListGCSBucket processor = getProcessor();
final TestRunner runner = buildNewRunner(processor);
addRequiredPropertiesToRunner(runner);
runner.assertValid();
final Blob blob = buildMockBlob("test-bucket-1", "test-key-1", 2L);
final Acl.Group mockGroup = mock(Acl.Group.class);
when(mockGroup.getEmail()).thenReturn(OWNER_GROUP_EMAIL);
when(blob.getOwner()).thenReturn(mockGroup);
final Iterable<Blob> mockList = ImmutableList.of(blob);
when(mockBlobPages.getValues()).thenReturn(mockList);
when(mockBlobPages.getNextPage()).thenReturn(null);
when(storage.list(anyString(), any(Storage.BlobListOption[].class))).thenReturn(mockBlobPages);
runner.enqueue("test");
runner.run();
runner.assertAllFlowFilesTransferred(FetchGCSObject.REL_SUCCESS);
runner.assertTransferCount(FetchGCSObject.REL_SUCCESS, 1);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchGCSObject.REL_SUCCESS).get(0);
assertEquals(OWNER_GROUP_EMAIL, flowFile.getAttribute(OWNER_ATTR));
assertEquals("group", flowFile.getAttribute(OWNER_TYPE_ATTR));
}
use of com.google.cloud.storage.Storage in project nifi by apache.
the class ListGCSBucketTest method testYieldOnBadStateRestore.
@Test
public void testYieldOnBadStateRestore() throws Exception {
reset(storage, mockBlobPages);
final ListGCSBucket processor = getProcessor();
final TestRunner runner = buildNewRunner(processor);
addRequiredPropertiesToRunner(runner);
runner.assertValid();
final Iterable<Blob> mockList = ImmutableList.of();
when(mockBlobPages.getValues()).thenReturn(mockList);
when(mockBlobPages.getNextPage()).thenReturn(null);
when(storage.list(anyString(), any(Storage.BlobListOption[].class))).thenReturn(mockBlobPages);
runner.getStateManager().setFailOnStateGet(Scope.CLUSTER, true);
runner.enqueue("test");
runner.run();
runner.assertTransferCount(ListGCSBucket.REL_SUCCESS, 0);
assertEquals(1, runner.getLogger().getErrorMessages().size());
}
Aggregations