Search in sources :

Example 11 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsUtilTest method testRecursiveGlobExpansion.

@Test
public void testRecursiveGlobExpansion() throws IOException {
    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
    Storage mockStorage = Mockito.mock(Storage.class);
    gcsUtil.setStorageClient(mockStorage);
    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
    Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
    Objects modelObjects = new Objects();
    List<StorageObject> items = new ArrayList<>();
    // A directory
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
    // Files within the directory
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file1.txt"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file2.txt"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file3.txt"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/anotherfile"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/file4.txt"));
    modelObjects.setItems(items);
    when(mockStorage.objects()).thenReturn(mockStorageObjects);
    when(mockStorageObjects.get("testbucket", "test/directory/otherfile")).thenReturn(mockStorageGet);
    when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
    when(mockStorageGet.execute()).thenReturn(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
    when(mockStorageList.execute()).thenReturn(modelObjects);
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/test/**/*.txt");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/test/directory/file1.txt"), GcsPath.fromUri("gs://testbucket/test/directory/file2.txt"), GcsPath.fromUri("gs://testbucket/test/directory/file3.txt"), GcsPath.fromUri("gs://testbucket/test/file4.txt"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
}
Also used : Storage(com.google.api.services.storage.Storage) GoogleCloudStorage(com.google.cloud.hadoop.gcsio.GoogleCloudStorage) StorageObject(com.google.api.services.storage.model.StorageObject) Objects(com.google.api.services.storage.model.Objects) ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) Test(org.junit.Test)

Example 12 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsUtilTest method testGCSWriteMetricsIsSet.

@Test
public void testGCSWriteMetricsIsSet() throws IOException {
    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
    GoogleCloudStorage mockStorage = Mockito.mock(GoogleCloudStorage.class);
    gcsUtil.setCloudStorageImpl(GoogleCloudStorageOptions.builder().setAppName("Beam").setGrpcEnabled(true).setProjectId("my_project").build());
    when(mockStorage.create(new StorageResourceId("testbucket", "testobject"), CreateObjectOptions.builder().setOverwriteExisting(true).setContentType("type").build())).thenThrow(IOException.class);
    GcsPath gcsPath = GcsPath.fromComponents("testbucket", "testobject");
    assertThrows(IOException.class, () -> gcsUtil.create(gcsPath, ""));
    verifyMetricWasSet("my_project", "testbucket", "GcsInsert", "permission_denied", 1);
}
Also used : GoogleCloudStorage(com.google.cloud.hadoop.gcsio.GoogleCloudStorage) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) StorageResourceId(com.google.cloud.hadoop.gcsio.StorageResourceId) Test(org.junit.Test)

Example 13 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class MinimalWordCountTest method buildMockGcsUtil.

private GcsUtil buildMockGcsUtil() throws IOException {
    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
    // Any request to open gets a new bogus channel
    Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(invocation -> FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
    // Any request for expansion returns a list containing the original GcsPath
    // This is required to pass validation that occurs in TextIO during apply()
    Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
    return mockGcsUtil;
}
Also used : GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) GcsUtil(org.apache.beam.sdk.extensions.gcp.util.GcsUtil)

Example 14 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class ExampleEchoPipelineTest method buildMockGcsUtil.

private GcsUtil buildMockGcsUtil() throws IOException {
    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
    // Any request to open gets a new bogus channel
    Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(new Answer<SeekableByteChannel>() {

        @Override
        public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
            return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
        }
    });
    // Any request for expansion returns a list containing the original GcsPath
    // This is required to pass validation that occurs in TextIO during apply()
    Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(new Answer<List<GcsPath>>() {

        @Override
        public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
        }
    });
    return mockGcsUtil;
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) GcsUtil(org.apache.beam.sdk.extensions.gcp.util.GcsUtil)

Example 15 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class DataflowPipelineTranslatorTest method buildPipelineOptions.

private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
    GcsUtil mockGcsUtil = mock(GcsUtil.class);
    when(mockGcsUtil.expand(any(GcsPath.class))).then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
    when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setRunner(DataflowRunner.class);
    options.setGcpCredential(new TestCredential());
    options.setJobName("some-job-name");
    options.setProject("some-project");
    options.setRegion("some-region");
    options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
    options.setFilesToStage(new ArrayList<>());
    options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
    options.setGcsUtil(mockGcsUtil);
    // Enable the FileSystems API to know about gs:// URIs in this test.
    FileSystems.setDefaultPipelineOptions(options);
    return options;
}
Also used : TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) GcsUtil(org.apache.beam.sdk.extensions.gcp.util.GcsUtil)

Aggregations

GcsPath (org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath)20 ArrayList (java.util.ArrayList)8 Objects (com.google.api.services.storage.model.Objects)7 StorageObject (com.google.api.services.storage.model.StorageObject)7 Test (org.junit.Test)7 GcsUtil (org.apache.beam.sdk.extensions.gcp.util.GcsUtil)6 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)6 GoogleCloudStorage (com.google.cloud.hadoop.gcsio.GoogleCloudStorage)5 List (java.util.List)5 GcsOptions (org.apache.beam.sdk.extensions.gcp.options.GcsOptions)5 Storage (com.google.api.services.storage.Storage)4 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)2 FileNotFoundException (java.io.FileNotFoundException)2 LinkedList (java.util.LinkedList)2 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)2 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)2 MatchResult (org.apache.beam.sdk.io.fs.MatchResult)2 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)2 StorageResourceId (com.google.cloud.hadoop.gcsio.StorageResourceId)1 File (java.io.File)1