use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsFileSystem method match.
@Override
protected List<MatchResult> match(List<String> specs) throws IOException {
List<GcsPath> gcsPaths = toGcsPaths(specs);
List<GcsPath> globs = Lists.newArrayList();
List<GcsPath> nonGlobs = Lists.newArrayList();
List<Boolean> isGlobBooleans = Lists.newArrayList();
for (GcsPath path : gcsPaths) {
if (GcsUtil.isWildcard(path)) {
globs.add(path);
isGlobBooleans.add(true);
} else {
nonGlobs.add(path);
isGlobBooleans.add(false);
}
}
Iterator<MatchResult> globsMatchResults = matchGlobs(globs).iterator();
Iterator<MatchResult> nonGlobsMatchResults = matchNonGlobs(nonGlobs).iterator();
ImmutableList.Builder<MatchResult> ret = ImmutableList.builder();
for (Boolean isGlob : isGlobBooleans) {
if (isGlob) {
checkState(globsMatchResults.hasNext(), "Expect globsMatchResults has next.");
ret.add(globsMatchResults.next());
} else {
checkState(nonGlobsMatchResults.hasNext(), "Expect nonGlobsMatchResults has next.");
ret.add(nonGlobsMatchResults.next());
}
}
checkState(!globsMatchResults.hasNext(), "Expect no more elements in globsMatchResults.");
checkState(!nonGlobsMatchResults.hasNext(), "Expect no more elements in nonGlobsMatchResults.");
return ret.build();
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsPathValidator method verifyPath.
@Override
public String verifyPath(String path) {
GcsPath gcsPath = getGcsPath(path);
checkArgument(gcsPath.isAbsolute(), "Must provide absolute paths for Dataflow");
checkArgument(!gcsPath.getObject().isEmpty(), "Missing object or bucket in path: '%s', did you mean: 'gs://some-bucket/%s'?", gcsPath, gcsPath.getBucket());
checkArgument(!gcsPath.getObject().contains("//"), "Dataflow Service does not allow objects with consecutive slashes");
return gcsPath.toResourceName();
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsResourceId method getCurrentDirectory.
@Override
public GcsResourceId getCurrentDirectory() {
if (isDirectory()) {
return this;
} else {
GcsPath parent = gcsPath.getParent();
checkState(parent != null, String.format("Failed to get the current directory for path: [%s].", gcsPath));
return fromGcsPath(parent);
}
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtilTest method testAccessDeniedObjectThrowsIOException.
// GCSUtil.expand() should fail for other errors such as access denied.
@Test
public void testAccessDeniedObjectThrowsIOException() throws IOException {
GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
Storage mockStorage = Mockito.mock(Storage.class);
gcsUtil.setStorageClient(mockStorage);
Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
GoogleJsonResponseException expectedException = googleJsonResponseException(HttpStatusCodes.STATUS_CODE_FORBIDDEN, "Waves hand mysteriously", "These aren't the buckets you're looking for");
when(mockStorage.objects()).thenReturn(mockStorageObjects);
when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(mockStorageGet);
when(mockStorageGet.execute()).thenThrow(expectedException);
thrown.expect(IOException.class);
thrown.expectMessage("Unable to get the file object for path");
gcsUtil.expand(pattern);
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtilTest method testNonExistentObjectReturnsEmptyResult.
// GCSUtil.expand() should fail when matching a single object when that object does not exist.
// We should return the empty result since GCS get object is strongly consistent.
@Test
public void testNonExistentObjectReturnsEmptyResult() throws IOException {
GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
Storage mockStorage = Mockito.mock(Storage.class);
gcsUtil.setStorageClient(mockStorage);
Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
GoogleJsonResponseException expectedException = googleJsonResponseException(HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see");
when(mockStorage.objects()).thenReturn(mockStorageObjects);
when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(mockStorageGet);
when(mockStorageGet.execute()).thenThrow(expectedException);
assertEquals(Collections.EMPTY_LIST, gcsUtil.expand(pattern));
}
Aggregations