Search in sources :

Example 1 with Storage

use of com.google.cloud.storage.Storage in project nifi by apache.

the class FetchGCSObject method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final long startNanos = System.nanoTime();
    String bucketName = context.getProperty(BUCKET).evaluateAttributeExpressions(flowFile).getValue();
    String key = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
    Long generation = context.getProperty(GENERATION).evaluateAttributeExpressions(flowFile).asLong();
    String encryptionKey = context.getProperty(ENCRYPTION_KEY).evaluateAttributeExpressions(flowFile).getValue();
    final Storage storage = getCloudService();
    final Map<String, String> attributes = new HashMap<>();
    final BlobId blobId = BlobId.of(bucketName, key, generation);
    try {
        final List<Storage.BlobSourceOption> blobSourceOptions = new ArrayList<>(2);
        if (encryptionKey != null) {
            blobSourceOptions.add(Storage.BlobSourceOption.decryptionKey(encryptionKey));
        }
        if (generation != null) {
            blobSourceOptions.add(Storage.BlobSourceOption.generationMatch());
        }
        final Blob blob = storage.get(blobId);
        if (blob == null) {
            throw new StorageException(404, "Blob " + blobId + " not found");
        }
        final ReadChannel reader = storage.reader(blobId, blobSourceOptions.toArray(new Storage.BlobSourceOption[blobSourceOptions.size()]));
        flowFile = session.importFrom(Channels.newInputStream(reader), flowFile);
        attributes.put(BUCKET_ATTR, blob.getBucket());
        attributes.put(KEY_ATTR, blob.getName());
        if (blob.getSize() != null) {
            attributes.put(SIZE_ATTR, String.valueOf(blob.getSize()));
        }
        if (blob.getCacheControl() != null) {
            attributes.put(CACHE_CONTROL_ATTR, blob.getCacheControl());
        }
        if (blob.getComponentCount() != null) {
            attributes.put(COMPONENT_COUNT_ATTR, String.valueOf(blob.getComponentCount()));
        }
        if (blob.getContentEncoding() != null) {
            attributes.put(CONTENT_ENCODING_ATTR, blob.getContentEncoding());
        }
        if (blob.getContentLanguage() != null) {
            attributes.put(CONTENT_LANGUAGE_ATTR, blob.getContentLanguage());
        }
        if (blob.getContentType() != null) {
            attributes.put(CoreAttributes.MIME_TYPE.key(), blob.getContentType());
        }
        if (blob.getCrc32c() != null) {
            attributes.put(CRC32C_ATTR, blob.getCrc32c());
        }
        if (blob.getCustomerEncryption() != null) {
            final BlobInfo.CustomerEncryption encryption = blob.getCustomerEncryption();
            attributes.put(ENCRYPTION_ALGORITHM_ATTR, encryption.getEncryptionAlgorithm());
            attributes.put(ENCRYPTION_SHA256_ATTR, encryption.getKeySha256());
        }
        if (blob.getEtag() != null) {
            attributes.put(ETAG_ATTR, blob.getEtag());
        }
        if (blob.getGeneratedId() != null) {
            attributes.put(GENERATED_ID_ATTR, blob.getGeneratedId());
        }
        if (blob.getGeneration() != null) {
            attributes.put(GENERATION_ATTR, String.valueOf(blob.getGeneration()));
        }
        if (blob.getMd5() != null) {
            attributes.put(MD5_ATTR, blob.getMd5());
        }
        if (blob.getMediaLink() != null) {
            attributes.put(MEDIA_LINK_ATTR, blob.getMediaLink());
        }
        if (blob.getMetageneration() != null) {
            attributes.put(METAGENERATION_ATTR, String.valueOf(blob.getMetageneration()));
        }
        if (blob.getOwner() != null) {
            final Acl.Entity entity = blob.getOwner();
            if (entity instanceof Acl.User) {
                attributes.put(OWNER_ATTR, ((Acl.User) entity).getEmail());
                attributes.put(OWNER_TYPE_ATTR, "user");
            } else if (entity instanceof Acl.Group) {
                attributes.put(OWNER_ATTR, ((Acl.Group) entity).getEmail());
                attributes.put(OWNER_TYPE_ATTR, "group");
            } else if (entity instanceof Acl.Domain) {
                attributes.put(OWNER_ATTR, ((Acl.Domain) entity).getDomain());
                attributes.put(OWNER_TYPE_ATTR, "domain");
            } else if (entity instanceof Acl.Project) {
                attributes.put(OWNER_ATTR, ((Acl.Project) entity).getProjectId());
                attributes.put(OWNER_TYPE_ATTR, "project");
            }
        }
        if (blob.getSelfLink() != null) {
            attributes.put(URI_ATTR, blob.getSelfLink());
        }
        if (blob.getContentDisposition() != null) {
            attributes.put(CONTENT_DISPOSITION_ATTR, blob.getContentDisposition());
            final Util.ParsedContentDisposition parsedContentDisposition = Util.parseContentDisposition(blob.getContentDisposition());
            if (parsedContentDisposition != null) {
                attributes.put(CoreAttributes.FILENAME.key(), parsedContentDisposition.getFileName());
            }
        }
        if (blob.getCreateTime() != null) {
            attributes.put(CREATE_TIME_ATTR, String.valueOf(blob.getCreateTime()));
        }
        if (blob.getUpdateTime() != null) {
            attributes.put(UPDATE_TIME_ATTR, String.valueOf(blob.getUpdateTime()));
        }
    } catch (StorageException e) {
        getLogger().error(e.getMessage(), e);
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    if (!attributes.isEmpty()) {
        flowFile = session.putAllAttributes(flowFile, attributes);
    }
    session.transfer(flowFile, REL_SUCCESS);
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
    getLogger().info("Successfully retrieved GCS Object for {} in {} millis; routing to success", new Object[] { flowFile, millis });
    session.getProvenanceReporter().fetch(flowFile, "https://" + bucketName + ".storage.googleapis.com/" + key, millis);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ReadChannel(com.google.cloud.ReadChannel) FlowFile(org.apache.nifi.flowfile.FlowFile) Blob(com.google.cloud.storage.Blob) BlobInfo(com.google.cloud.storage.BlobInfo) Acl(com.google.cloud.storage.Acl) Storage(com.google.cloud.storage.Storage) BlobId(com.google.cloud.storage.BlobId) StorageException(com.google.cloud.storage.StorageException)

Example 2 with Storage

use of com.google.cloud.storage.Storage in project nifi by apache.

the class ListGCSBucket method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    try {
        restoreState(context);
    } catch (IOException e) {
        getLogger().error("Failed to restore processor state; yielding", e);
        context.yield();
        return;
    }
    final long startNanos = System.nanoTime();
    final String bucket = context.getProperty(BUCKET).getValue();
    final String prefix = context.getProperty(PREFIX).getValue();
    final boolean useGenerations = context.getProperty(USE_GENERATIONS).asBoolean();
    List<Storage.BlobListOption> listOptions = new ArrayList<>();
    if (prefix != null) {
        listOptions.add(Storage.BlobListOption.prefix(prefix));
    }
    if (useGenerations) {
        listOptions.add(Storage.BlobListOption.versions(true));
    }
    final Storage storage = getCloudService();
    int listCount = 0;
    long maxTimestamp = 0L;
    Page<Blob> blobPages = storage.list(bucket, listOptions.toArray(new Storage.BlobListOption[listOptions.size()]));
    do {
        for (Blob blob : blobPages.getValues()) {
            long lastModified = blob.getUpdateTime();
            if (lastModified < currentTimestamp || lastModified == currentTimestamp && currentKeys.contains(blob.getName())) {
                continue;
            }
            // Create attributes
            final Map<String, String> attributes = new HashMap<>();
            attributes.put(BUCKET_ATTR, blob.getBucket());
            attributes.put(KEY_ATTR, blob.getName());
            if (blob.getSize() != null) {
                attributes.put(SIZE_ATTR, String.valueOf(blob.getSize()));
            }
            if (blob.getCacheControl() != null) {
                attributes.put(CACHE_CONTROL_ATTR, blob.getCacheControl());
            }
            if (blob.getComponentCount() != null) {
                attributes.put(COMPONENT_COUNT_ATTR, String.valueOf(blob.getComponentCount()));
            }
            if (blob.getContentDisposition() != null) {
                attributes.put(CONTENT_DISPOSITION_ATTR, blob.getContentDisposition());
            }
            if (blob.getContentEncoding() != null) {
                attributes.put(CONTENT_ENCODING_ATTR, blob.getContentEncoding());
            }
            if (blob.getContentLanguage() != null) {
                attributes.put(CONTENT_LANGUAGE_ATTR, blob.getContentLanguage());
            }
            if (blob.getContentType() != null) {
                attributes.put(CoreAttributes.MIME_TYPE.key(), blob.getContentType());
            }
            if (blob.getCrc32c() != null) {
                attributes.put(CRC32C_ATTR, blob.getCrc32c());
            }
            if (blob.getCustomerEncryption() != null) {
                final BlobInfo.CustomerEncryption encryption = blob.getCustomerEncryption();
                attributes.put(ENCRYPTION_ALGORITHM_ATTR, encryption.getEncryptionAlgorithm());
                attributes.put(ENCRYPTION_SHA256_ATTR, encryption.getKeySha256());
            }
            if (blob.getEtag() != null) {
                attributes.put(ETAG_ATTR, blob.getEtag());
            }
            if (blob.getGeneratedId() != null) {
                attributes.put(GENERATED_ID_ATTR, blob.getGeneratedId());
            }
            if (blob.getGeneration() != null) {
                attributes.put(GENERATION_ATTR, String.valueOf(blob.getGeneration()));
            }
            if (blob.getMd5() != null) {
                attributes.put(MD5_ATTR, blob.getMd5());
            }
            if (blob.getMediaLink() != null) {
                attributes.put(MEDIA_LINK_ATTR, blob.getMediaLink());
            }
            if (blob.getMetageneration() != null) {
                attributes.put(METAGENERATION_ATTR, String.valueOf(blob.getMetageneration()));
            }
            if (blob.getOwner() != null) {
                final Acl.Entity entity = blob.getOwner();
                if (entity instanceof Acl.User) {
                    attributes.put(OWNER_ATTR, ((Acl.User) entity).getEmail());
                    attributes.put(OWNER_TYPE_ATTR, "user");
                } else if (entity instanceof Acl.Group) {
                    attributes.put(OWNER_ATTR, ((Acl.Group) entity).getEmail());
                    attributes.put(OWNER_TYPE_ATTR, "group");
                } else if (entity instanceof Acl.Domain) {
                    attributes.put(OWNER_ATTR, ((Acl.Domain) entity).getDomain());
                    attributes.put(OWNER_TYPE_ATTR, "domain");
                } else if (entity instanceof Acl.Project) {
                    attributes.put(OWNER_ATTR, ((Acl.Project) entity).getProjectId());
                    attributes.put(OWNER_TYPE_ATTR, "project");
                }
            }
            if (blob.getSelfLink() != null) {
                attributes.put(URI_ATTR, blob.getSelfLink());
            }
            attributes.put(CoreAttributes.FILENAME.key(), blob.getName());
            if (blob.getCreateTime() != null) {
                attributes.put(CREATE_TIME_ATTR, String.valueOf(blob.getCreateTime()));
            }
            if (blob.getUpdateTime() != null) {
                attributes.put(UPDATE_TIME_ATTR, String.valueOf(blob.getUpdateTime()));
            }
            // Create the flowfile
            FlowFile flowFile = session.create();
            flowFile = session.putAllAttributes(flowFile, attributes);
            session.transfer(flowFile, REL_SUCCESS);
            // Update state
            if (lastModified > maxTimestamp) {
                maxTimestamp = lastModified;
                currentKeys.clear();
            }
            if (lastModified == maxTimestamp) {
                currentKeys.add(blob.getName());
            }
            listCount++;
        }
        blobPages = blobPages.getNextPage();
        commit(context, session, listCount);
        listCount = 0;
    } while (blobPages != null);
    currentTimestamp = maxTimestamp;
    final long listMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
    getLogger().info("Successfully listed GCS bucket {} in {} millis", new Object[] { bucket, listMillis });
    if (!commit(context, session, listCount)) {
        if (currentTimestamp > 0) {
            persistState(context);
        }
        getLogger().debug("No new objects in GCS bucket {} to list. Yielding.", new Object[] { bucket });
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Blob(com.google.cloud.storage.Blob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException) BlobInfo(com.google.cloud.storage.BlobInfo) Acl(com.google.cloud.storage.Acl) Storage(com.google.cloud.storage.Storage)

Example 3 with Storage

use of com.google.cloud.storage.Storage in project nifi by apache.

the class FetchGCSObjectTest method testBlobIdWithGeneration.

@Test
public void testBlobIdWithGeneration() throws Exception {
    reset(storage);
    final TestRunner runner = buildNewRunner(getProcessor());
    addRequiredPropertiesToRunner(runner);
    runner.removeProperty(FetchGCSObject.KEY);
    runner.removeProperty(FetchGCSObject.BUCKET);
    runner.setProperty(FetchGCSObject.GENERATION, String.valueOf(GENERATION));
    runner.assertValid();
    final Blob blob = mock(Blob.class);
    when(storage.get(any(BlobId.class))).thenReturn(blob);
    when(storage.reader(any(BlobId.class), any(Storage.BlobSourceOption.class))).thenReturn(new MockReadChannel(CONTENT));
    runner.enqueue("", ImmutableMap.of(BUCKET_ATTR, BUCKET, CoreAttributes.FILENAME.key(), KEY));
    runner.run();
    ArgumentCaptor<BlobId> blobIdArgumentCaptor = ArgumentCaptor.forClass(BlobId.class);
    ArgumentCaptor<Storage.BlobSourceOption> blobSourceOptionArgumentCaptor = ArgumentCaptor.forClass(Storage.BlobSourceOption.class);
    verify(storage).get(blobIdArgumentCaptor.capture());
    verify(storage).reader(any(BlobId.class), blobSourceOptionArgumentCaptor.capture());
    final BlobId blobId = blobIdArgumentCaptor.getValue();
    assertEquals(BUCKET, blobId.getBucket());
    assertEquals(KEY, blobId.getName());
    assertEquals(GENERATION, blobId.getGeneration());
    final Set<Storage.BlobSourceOption> blobSourceOptions = ImmutableSet.copyOf(blobSourceOptionArgumentCaptor.getAllValues());
    assertTrue(blobSourceOptions.contains(Storage.BlobSourceOption.generationMatch()));
    assertEquals(1, blobSourceOptions.size());
}
Also used : Blob(com.google.cloud.storage.Blob) Storage(com.google.cloud.storage.Storage) TestRunner(org.apache.nifi.util.TestRunner) BlobId(com.google.cloud.storage.BlobId) Test(org.junit.Test)

Example 4 with Storage

use of com.google.cloud.storage.Storage in project nifi by apache.

the class ListGCSBucketTest method testAclOwnerGroup.

@Test
public void testAclOwnerGroup() throws Exception {
    reset(storage, mockBlobPages);
    final ListGCSBucket processor = getProcessor();
    final TestRunner runner = buildNewRunner(processor);
    addRequiredPropertiesToRunner(runner);
    runner.assertValid();
    final Blob blob = buildMockBlob("test-bucket-1", "test-key-1", 2L);
    final Acl.Group mockGroup = mock(Acl.Group.class);
    when(mockGroup.getEmail()).thenReturn(OWNER_GROUP_EMAIL);
    when(blob.getOwner()).thenReturn(mockGroup);
    final Iterable<Blob> mockList = ImmutableList.of(blob);
    when(mockBlobPages.getValues()).thenReturn(mockList);
    when(mockBlobPages.getNextPage()).thenReturn(null);
    when(storage.list(anyString(), any(Storage.BlobListOption[].class))).thenReturn(mockBlobPages);
    runner.enqueue("test");
    runner.run();
    runner.assertAllFlowFilesTransferred(FetchGCSObject.REL_SUCCESS);
    runner.assertTransferCount(FetchGCSObject.REL_SUCCESS, 1);
    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchGCSObject.REL_SUCCESS).get(0);
    assertEquals(OWNER_GROUP_EMAIL, flowFile.getAttribute(OWNER_ATTR));
    assertEquals("group", flowFile.getAttribute(OWNER_TYPE_ATTR));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) Blob(com.google.cloud.storage.Blob) Storage(com.google.cloud.storage.Storage) TestRunner(org.apache.nifi.util.TestRunner) Acl(com.google.cloud.storage.Acl) Test(org.junit.Test)

Example 5 with Storage

use of com.google.cloud.storage.Storage in project nifi by apache.

the class ListGCSBucketTest method testYieldOnBadStateRestore.

@Test
public void testYieldOnBadStateRestore() throws Exception {
    reset(storage, mockBlobPages);
    final ListGCSBucket processor = getProcessor();
    final TestRunner runner = buildNewRunner(processor);
    addRequiredPropertiesToRunner(runner);
    runner.assertValid();
    final Iterable<Blob> mockList = ImmutableList.of();
    when(mockBlobPages.getValues()).thenReturn(mockList);
    when(mockBlobPages.getNextPage()).thenReturn(null);
    when(storage.list(anyString(), any(Storage.BlobListOption[].class))).thenReturn(mockBlobPages);
    runner.getStateManager().setFailOnStateGet(Scope.CLUSTER, true);
    runner.enqueue("test");
    runner.run();
    runner.assertTransferCount(ListGCSBucket.REL_SUCCESS, 0);
    assertEquals(1, runner.getLogger().getErrorMessages().size());
}
Also used : Blob(com.google.cloud.storage.Blob) Storage(com.google.cloud.storage.Storage) TestRunner(org.apache.nifi.util.TestRunner) Test(org.junit.Test)

Aggregations

Storage (com.google.cloud.storage.Storage)140 Bucket (com.google.cloud.storage.Bucket)45 Blob (com.google.cloud.storage.Blob)44 Test (org.junit.Test)30 BlobId (com.google.cloud.storage.BlobId)24 BlobInfo (com.google.cloud.storage.BlobInfo)14 TestRunner (org.apache.nifi.util.TestRunner)12 SpringBootTest (org.springframework.boot.test.context.SpringBootTest)11 Policy (com.google.cloud.Policy)9 ArrayList (java.util.ArrayList)8 Acl (com.google.cloud.storage.Acl)7 Date (java.util.Date)7 HashMap (java.util.HashMap)7 BucketInfo (com.google.cloud.storage.BucketInfo)6 HmacKeyMetadata (com.google.cloud.storage.HmacKey.HmacKeyMetadata)6 MockFlowFile (org.apache.nifi.util.MockFlowFile)6 GoogleCredentials (com.google.auth.oauth2.GoogleCredentials)5 Binding (com.google.cloud.Binding)5 WriteChannel (com.google.cloud.WriteChannel)4 IOException (java.io.IOException)4