use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtilTest method testAccessDeniedObjectThrowsIOException.
// GCSUtil.expand() should fail for other errors such as access denied.
@Test
public void testAccessDeniedObjectThrowsIOException() throws IOException {
GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
Storage mockStorage = Mockito.mock(Storage.class);
gcsUtil.setStorageClient(mockStorage);
Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
GoogleJsonResponseException expectedException = googleJsonResponseException(HttpStatusCodes.STATUS_CODE_FORBIDDEN, "Waves hand mysteriously", "These aren't the buckets you're looking for");
when(mockStorage.objects()).thenReturn(mockStorageObjects);
when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(mockStorageGet);
when(mockStorageGet.execute()).thenThrow(expectedException);
thrown.expect(IOException.class);
thrown.expectMessage("Unable to get the file object for path");
gcsUtil.expand(pattern);
}
use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtilTest method testNonExistentObjectReturnsEmptyResult.
// GCSUtil.expand() should fail when matching a single object when that object does not exist.
// We should return the empty result since GCS get object is strongly consistent.
@Test
public void testNonExistentObjectReturnsEmptyResult() throws IOException {
GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
Storage mockStorage = Mockito.mock(Storage.class);
gcsUtil.setStorageClient(mockStorage);
Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
GoogleJsonResponseException expectedException = googleJsonResponseException(HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see");
when(mockStorage.objects()).thenReturn(mockStorageObjects);
when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(mockStorageGet);
when(mockStorageGet.execute()).thenThrow(expectedException);
assertEquals(Collections.emptyList(), gcsUtil.expand(pattern));
}
use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.
the class GcsFileSystemTest method testGlobExpansion.
@Test
public void testGlobExpansion() throws IOException {
Objects modelObjects = new Objects();
List<StorageObject> items = new ArrayList<>();
// A directory
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
// Files within the directory
items.add(createStorageObject("gs://testbucket/testdirectory/file1name", 1L));
items.add(createStorageObject("gs://testbucket/testdirectory/file2name", 2L));
items.add(createStorageObject("gs://testbucket/testdirectory/file3name", 3L));
items.add(createStorageObject("gs://testbucket/testdirectory/otherfile", 4L));
items.add(createStorageObject("gs://testbucket/testdirectory/anotherfile", 5L));
items.add(createStorageObject("gs://testbucket/testotherdirectory/file4name", 6L));
modelObjects.setItems(items);
when(mockGcsUtil.listObjects(eq("testbucket"), anyString(), isNull(String.class))).thenReturn(modelObjects);
// Test patterns.
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file*");
List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name");
assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
}
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file[1-3]*");
List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name");
assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
}
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file?name");
List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name");
assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
}
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/test*ectory/fi*name");
List<String> expectedFiles = ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name", "gs://testbucket/testotherdirectory/file4name");
assertThat(expectedFiles, contains(toFilenames(gcsFileSystem.expand(pattern)).toArray()));
}
}
use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.
the class GcsPathValidator method verifyPath.
@Override
public String verifyPath(String path) {
GcsPath gcsPath = getGcsPath(path);
checkArgument(gcsPath.isAbsolute(), "Must provide absolute paths for Dataflow");
checkArgument(!gcsPath.getObject().isEmpty(), "Missing object or bucket in path: '%s', did you mean: 'gs://some-bucket/%s'?", gcsPath, gcsPath.getBucket());
checkArgument(!gcsPath.getObject().contains("//"), "Dataflow Service does not allow objects with consecutive slashes");
return gcsPath.toResourceName();
}
use of org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtil method makeGetBatches.
/**
* Makes get {@link BatchInterface BatchInterfaces}.
*
* @param paths {@link GcsPath GcsPaths}.
* @param results mutable {@link List} for return values.
* @return {@link BatchInterface BatchInterfaces} to execute.
* @throws IOException
*/
@VisibleForTesting
List<BatchInterface> makeGetBatches(Collection<GcsPath> paths, List<StorageObjectOrIOException[]> results) throws IOException {
List<BatchInterface> batches = new ArrayList<>();
for (List<GcsPath> filesToGet : Lists.partition(Lists.newArrayList(paths), MAX_REQUESTS_PER_BATCH)) {
BatchInterface batch = batchRequestSupplier.get();
for (GcsPath path : filesToGet) {
results.add(enqueueGetFileSize(path, batch));
}
batches.add(batch);
}
return batches;
}
Aggregations