use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsFileSystemTest method testMatch.
@Test
public void testMatch() throws Exception {
Objects modelObjects = new Objects();
List<StorageObject> items = new ArrayList<>();
// A directory
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
// Files within the directory
items.add(createStorageObject("gs://testbucket/testdirectory/file1name", 1L));
items.add(createStorageObject("gs://testbucket/testdirectory/file2name", 2L));
items.add(createStorageObject("gs://testbucket/testdirectory/file3name", 3L));
items.add(createStorageObject("gs://testbucket/testdirectory/file4name", 4L));
items.add(createStorageObject("gs://testbucket/testdirectory/otherfile", 5L));
items.add(createStorageObject("gs://testbucket/testdirectory/anotherfile", 6L));
modelObjects.setItems(items);
when(mockGcsUtil.listObjects(eq("testbucket"), anyString(), isNull(String.class))).thenReturn(modelObjects);
List<GcsPath> gcsPaths = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/non-exist-file"), GcsPath.fromUri("gs://testbucket/testdirectory/otherfile"));
when(mockGcsUtil.getObjects(eq(gcsPaths))).thenReturn(ImmutableList.of(StorageObjectOrIOException.create(new FileNotFoundException()), StorageObjectOrIOException.create(createStorageObject("gs://testbucket/testdirectory/otherfile", 4L))));
List<String> specs = ImmutableList.of("gs://testbucket/testdirectory/file[1-3]*", "gs://testbucket/testdirectory/non-exist-file", "gs://testbucket/testdirectory/otherfile");
List<MatchResult> matchResults = gcsFileSystem.match(specs);
assertEquals(3, matchResults.size());
assertEquals(Status.OK, matchResults.get(0).status());
assertThat(ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name"), contains(toFilenames(matchResults.get(0)).toArray()));
assertEquals(Status.NOT_FOUND, matchResults.get(1).status());
assertEquals(Status.OK, matchResults.get(2).status());
assertThat(ImmutableList.of("gs://testbucket/testdirectory/otherfile"), contains(toFilenames(matchResults.get(2)).toArray()));
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsFileSystemTest method createStorageObject.
private StorageObject createStorageObject(String gcsFilename, long fileSize) {
GcsPath gcsPath = GcsPath.fromUri(gcsFilename);
// Google APIs will use null for empty files.
@Nullable BigInteger size = (fileSize == 0) ? null : BigInteger.valueOf(fileSize);
return new StorageObject().setBucket(gcsPath.getBucket()).setName(gcsPath.getObject()).setSize(size);
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtilTest method testRecursiveGlobExpansion.
@Test
public void testRecursiveGlobExpansion() throws IOException {
GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
Storage mockStorage = Mockito.mock(Storage.class);
gcsUtil.setStorageClient(mockStorage);
Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
Objects modelObjects = new Objects();
List<StorageObject> items = new ArrayList<>();
// A directory
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
// Files within the directory
items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file1.txt"));
items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file2.txt"));
items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file3.txt"));
items.add(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
items.add(new StorageObject().setBucket("testbucket").setName("test/directory/anotherfile"));
items.add(new StorageObject().setBucket("testbucket").setName("test/file4.txt"));
modelObjects.setItems(items);
when(mockStorage.objects()).thenReturn(mockStorageObjects);
when(mockStorageObjects.get("testbucket", "test/directory/otherfile")).thenReturn(mockStorageGet);
when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
when(mockStorageGet.execute()).thenReturn(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
when(mockStorageList.execute()).thenReturn(modelObjects);
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/test/**/*.txt");
List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/test/directory/file1.txt"), GcsPath.fromUri("gs://testbucket/test/directory/file2.txt"), GcsPath.fromUri("gs://testbucket/test/directory/file3.txt"), GcsPath.fromUri("gs://testbucket/test/file4.txt"));
assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
}
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtilTest method testGlobExpansion.
@Test
public void testGlobExpansion() throws IOException {
GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
Storage mockStorage = Mockito.mock(Storage.class);
gcsUtil.setStorageClient(mockStorage);
Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
Objects modelObjects = new Objects();
List<StorageObject> items = new ArrayList<>();
// A directory
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
// Files within the directory
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file1name"));
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file2name"));
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file3name"));
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/otherfile"));
items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/anotherfile"));
modelObjects.setItems(items);
when(mockStorage.objects()).thenReturn(mockStorageObjects);
when(mockStorageObjects.get("testbucket", "testdirectory/otherfile")).thenReturn(mockStorageGet);
when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
when(mockStorageGet.execute()).thenReturn(new StorageObject().setBucket("testbucket").setName("testdirectory/otherfile"));
when(mockStorageList.execute()).thenReturn(modelObjects);
// Test a single file.
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/otherfile");
List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/otherfile"));
assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
}
// Test patterns.
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file*");
List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
}
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file[1-3]*");
List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
}
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file?name");
List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
}
{
GcsPath pattern = GcsPath.fromUri("gs://testbucket/test*ectory/fi*name");
List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
}
}
use of org.apache.beam.sdk.util.gcsfs.GcsPath in project beam by apache.
the class GcsUtil method makeCopyBatches.
List<BatchRequest> makeCopyBatches(Iterable<String> srcFilenames, Iterable<String> destFilenames) throws IOException {
List<String> srcList = Lists.newArrayList(srcFilenames);
List<String> destList = Lists.newArrayList(destFilenames);
checkArgument(srcList.size() == destList.size(), "Number of source files %s must equal number of destination files %s", srcList.size(), destList.size());
List<BatchRequest> batches = new LinkedList<>();
BatchRequest batch = createBatchRequest();
for (int i = 0; i < srcList.size(); i++) {
final GcsPath sourcePath = GcsPath.fromUri(srcList.get(i));
final GcsPath destPath = GcsPath.fromUri(destList.get(i));
enqueueCopy(sourcePath, destPath, batch);
if (batch.size() >= MAX_REQUESTS_PER_BATCH) {
batches.add(batch);
batch = createBatchRequest();
}
}
if (batch.size() > 0) {
batches.add(batch);
}
return batches;
}
Aggregations