use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.
the class DataflowRunnerTest method buildMockGcsUtil.
private GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = mock(GcsUtil.class);
when(mockGcsUtil.create(any(GcsPath.class), anyString())).then(new Answer<SeekableByteChannel>() {
@Override
public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
}
});
when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});
return mockGcsUtil;
}
use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.
the class TfIdf method listInputDocuments.
/**
* Lists documents contained beneath the {@code options.input} prefix/directory.
*/
public static Set<URI> listInputDocuments(Options options) throws URISyntaxException, IOException {
URI baseUri = new URI(options.getInput());
// List all documents in the directory or GCS prefix.
URI absoluteUri;
if (baseUri.getScheme() != null) {
absoluteUri = baseUri;
} else {
absoluteUri = new URI("file", baseUri.getAuthority(), baseUri.getPath(), baseUri.getQuery(), baseUri.getFragment());
}
Set<URI> uris = new HashSet<>();
if (absoluteUri.getScheme().equals("file")) {
File directory = new File(absoluteUri);
for (String entry : directory.list()) {
File path = new File(directory, entry);
uris.add(path.toURI());
}
} else if (absoluteUri.getScheme().equals("gs")) {
GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
URI gcsUriGlob = new URI(absoluteUri.getScheme(), absoluteUri.getAuthority(), absoluteUri.getPath() + "*", absoluteUri.getQuery(), absoluteUri.getFragment());
for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
uris.add(entry.toUri());
}
}
return uris;
}
use of org.apache.beam.sdk.util.GcsUtil in project DataflowJavaSDK-examples by GoogleCloudPlatform.
the class MinimalWordCountJava8Test method buildMockGcsUtil.
private GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
// Any request to open gets a new bogus channel
Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(new Answer<SeekableByteChannel>() {
@Override
public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
}
});
// Any request for expansion returns a list containing the original GcsPath
// This is required to pass validation that occurs in TextIO during apply()
Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});
return mockGcsUtil;
}
use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.
the class MinimalWordCountJava8Test method buildMockGcsUtil.
private GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
// Any request to open gets a new bogus channel
Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(new Answer<SeekableByteChannel>() {
@Override
public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
}
});
// Any request for expansion returns a list containing the original GcsPath
// This is required to pass validation that occurs in TextIO during apply()
Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});
return mockGcsUtil;
}
use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.
the class DataflowPipelineTranslatorTest method buildPipelineOptions.
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
GcsUtil mockGcsUtil = mock(GcsUtil.class);
when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});
when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setRunner(DataflowRunner.class);
options.setGcpCredential(new TestCredential());
options.setJobName("some-job-name");
options.setProject("some-project");
options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
options.setFilesToStage(new LinkedList<String>());
options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
options.setGcsUtil(mockGcsUtil);
return options;
}
Aggregations