Search in sources :

Example 1 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsUtilTest method testAccessDeniedObjectThrowsIOException.

// GCSUtil.expand() should fail for other errors such as access denied.
@Test
public void testAccessDeniedObjectThrowsIOException() throws IOException {
    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
    Storage mockStorage = Mockito.mock(Storage.class);
    gcsUtil.setStorageClient(mockStorage);
    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
    GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
    GoogleJsonResponseException expectedException = googleJsonResponseException(HttpStatusCodes.STATUS_CODE_FORBIDDEN, "Waves hand mysteriously", "These aren't the buckets you're looking for");
    when(mockStorage.objects()).thenReturn(mockStorageObjects);
    when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(mockStorageGet);
    when(mockStorageGet.execute()).thenThrow(expectedException);
    thrown.expect(IOException.class);
    thrown.expectMessage("Unable to get the file object for path");
    gcsUtil.expand(pattern);
}
Also used : GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) Storage(com.google.api.services.storage.Storage) GoogleCloudStorage(com.google.cloud.hadoop.gcsio.GoogleCloudStorage) Objects(com.google.api.services.storage.model.Objects) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) Test(org.junit.Test)

Example 2 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsUtilTest method testNonExistentObjectReturnsEmptyResult.

// GCSUtil.expand() should fail when matching a single object when that object does not exist.
// We should return the empty result since GCS get object is strongly consistent.
@Test
public void testNonExistentObjectReturnsEmptyResult() throws IOException {
    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
    Storage mockStorage = Mockito.mock(Storage.class);
    gcsUtil.setStorageClient(mockStorage);
    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
    GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
    GoogleJsonResponseException expectedException = googleJsonResponseException(HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see");
    when(mockStorage.objects()).thenReturn(mockStorageObjects);
    when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(mockStorageGet);
    when(mockStorageGet.execute()).thenThrow(expectedException);
    assertEquals(Collections.emptyList(), gcsUtil.expand(pattern));
}
Also used : GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) Storage(com.google.api.services.storage.Storage) GoogleCloudStorage(com.google.cloud.hadoop.gcsio.GoogleCloudStorage) Objects(com.google.api.services.storage.model.Objects) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) Test(org.junit.Test)

Example 3 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsFileSystemTest method testGlobExpansion.

@Test
public void testGlobExpansion() throws IOException {
    Objects modelObjects = new Objects();
    List<StorageObject> items = new ArrayList<>();
    // A directory
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
    // Files within the directory
    items.add(createStorageObject("gs://testbucket/testdirectory/file1name", 1L));
    items.add(createStorageObject("gs://testbucket/testdirectory/file2name", 2L));
    items.add(createStorageObject("gs://testbucket/testdirectory/file3name", 3L));
    items.add(createStorageObject("gs://testbucket/testdirectory/otherfile", 4L));
    items.add(createStorageObject("gs://testbucket/testdirectory/anotherfile", 5L));
    items.add(createStorageObject("gs://testbucket/testotherdirectory/file4name", 6L));
    modelObjects.setItems(items);
    when(mockGcsUtil.listObjects(eq("testbucket"), anyString(), isNull(String.class))).thenReturn(modelObjects);
    // Test patterns.
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file*");
        List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name");
        assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
    }
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file[1-3]*");
        List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name");
        assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
    }
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file?name");
        List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name");
        assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
    }
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/test*ectory/fi*name");
        List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name", "gs://testbucket/testotherdirectory/file4name");
        assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
    }
}
Also used : StorageObject(com.google.api.services.storage.model.StorageObject) Objects(com.google.api.services.storage.model.Objects) ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Test(org.junit.Test)

Example 4 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsPathValidator method verifyPath.

@Override
public String verifyPath(String path) {
    GcsPath gcsPath = getGcsPath(path);
    checkArgument(gcsPath.isAbsolute(), "Must provide absolute paths for Dataflow");
    checkArgument(!gcsPath.getObject().isEmpty(), "Missing object or bucket in path: '%s', did you mean: 'gs://some-bucket/%s'?", gcsPath, gcsPath.getBucket());
    checkArgument(!gcsPath.getObject().contains("//"), "Dataflow Service does not allow objects with consecutive slashes");
    return gcsPath.toResourceName();
}
Also used : GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath)

Example 5 with GcsPath

use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.

the class GcsUtil method makeGetBatches.

/**
 * Makes get {@link BatchInterface BatchInterfaces}.
 *
 * @param paths {@link GcsPath GcsPaths}.
 * @param results mutable {@link List} for return values.
 * @return {@link BatchInterface BatchInterfaces} to execute.
 * @throws IOException
 */
@VisibleForTesting
List<BatchInterface> makeGetBatches(Collection<GcsPath> paths, List<StorageObjectOrIOException[]> results) throws IOException {
    List<BatchInterface> batches = new ArrayList<>();
    for (List<GcsPath> filesToGet : Lists.partition(Lists.newArrayList(paths), MAX_REQUESTS_PER_BATCH)) {
        BatchInterface batch = batchRequestSupplier.get();
        for (GcsPath path : filesToGet) {
            results.add(enqueueGetFileSize(path, batch));
        }
        batches.add(batch);
    }
    return batches;
}
Also used : ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

GcsPath (org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath)20 ArrayList (java.util.ArrayList)8 Objects (com.google.api.services.storage.model.Objects)7 StorageObject (com.google.api.services.storage.model.StorageObject)7 Test (org.junit.Test)7 GcsUtil (org.apache.beam.sdk.extensions.gcp.util.GcsUtil)6 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)6 GoogleCloudStorage (com.google.cloud.hadoop.gcsio.GoogleCloudStorage)5 List (java.util.List)5 GcsOptions (org.apache.beam.sdk.extensions.gcp.options.GcsOptions)5 Storage (com.google.api.services.storage.Storage)4 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)2 FileNotFoundException (java.io.FileNotFoundException)2 LinkedList (java.util.LinkedList)2 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)2 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)2 MatchResult (org.apache.beam.sdk.io.fs.MatchResult)2 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)2 StorageResourceId (com.google.cloud.hadoop.gcsio.StorageResourceId)1 File (java.io.File)1