Search in sources :

Example 1 with GcsUtil

use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.

the class DataflowRunnerTest method buildMockGcsUtil.

private GcsUtil buildMockGcsUtil() throws IOException {
    GcsUtil mockGcsUtil = mock(GcsUtil.class);
    when(mockGcsUtil.create(any(GcsPath.class), anyString())).then(new Answer<SeekableByteChannel>() {

        @Override
        public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
            return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
        }
    });
    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {

        @Override
        public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
        }
    });
    return mockGcsUtil;
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) GcsUtil(org.apache.beam.sdk.util.GcsUtil)

Example 2 with GcsUtil

use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.

the class TfIdf method listInputDocuments.

/**
   * Lists documents contained beneath the {@code options.input} prefix/directory.
   */
public static Set<URI> listInputDocuments(Options options) throws URISyntaxException, IOException {
    URI baseUri = new URI(options.getInput());
    // List all documents in the directory or GCS prefix.
    URI absoluteUri;
    if (baseUri.getScheme() != null) {
        absoluteUri = baseUri;
    } else {
        absoluteUri = new URI("file", baseUri.getAuthority(), baseUri.getPath(), baseUri.getQuery(), baseUri.getFragment());
    }
    Set<URI> uris = new HashSet<>();
    if (absoluteUri.getScheme().equals("file")) {
        File directory = new File(absoluteUri);
        for (String entry : directory.list()) {
            File path = new File(directory, entry);
            uris.add(path.toURI());
        }
    } else if (absoluteUri.getScheme().equals("gs")) {
        GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
        URI gcsUriGlob = new URI(absoluteUri.getScheme(), absoluteUri.getAuthority(), absoluteUri.getPath() + "*", absoluteUri.getQuery(), absoluteUri.getFragment());
        for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
            uris.add(entry.toUri());
        }
    }
    return uris;
}
Also used : GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) URI(java.net.URI) File(java.io.File) GcsUtil(org.apache.beam.sdk.util.GcsUtil) HashSet(java.util.HashSet)

Example 3 with GcsUtil

use of org.apache.beam.sdk.util.GcsUtil in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class MinimalWordCountJava8Test method buildMockGcsUtil.

private GcsUtil buildMockGcsUtil() throws IOException {
    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
    // Any request to open gets a new bogus channel
    Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(new Answer<SeekableByteChannel>() {

        @Override
        public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
            return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
        }
    });
    // Any request for expansion returns a list containing the original GcsPath
    // This is required to pass validation that occurs in TextIO during apply()
    Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(new Answer<List<GcsPath>>() {

        @Override
        public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
        }
    });
    return mockGcsUtil;
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) GcsUtil(org.apache.beam.sdk.util.GcsUtil)

Example 4 with GcsUtil

use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.

the class MinimalWordCountJava8Test method buildMockGcsUtil.

private GcsUtil buildMockGcsUtil() throws IOException {
    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
    // Any request to open gets a new bogus channel
    Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(new Answer<SeekableByteChannel>() {

        @Override
        public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
            return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
        }
    });
    // Any request for expansion returns a list containing the original GcsPath
    // This is required to pass validation that occurs in TextIO during apply()
    Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(new Answer<List<GcsPath>>() {

        @Override
        public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
        }
    });
    return mockGcsUtil;
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) GcsUtil(org.apache.beam.sdk.util.GcsUtil)

Example 5 with GcsUtil

use of org.apache.beam.sdk.util.GcsUtil in project beam by apache.

the class DataflowPipelineTranslatorTest method buildPipelineOptions.

private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
    GcsUtil mockGcsUtil = mock(GcsUtil.class);
    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {

        @Override
        public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
        }
    });
    when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setRunner(DataflowRunner.class);
    options.setGcpCredential(new TestCredential());
    options.setJobName("some-job-name");
    options.setProject("some-project");
    options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
    options.setFilesToStage(new LinkedList<String>());
    options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
    options.setGcsUtil(mockGcsUtil);
    return options;
}
Also used : TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) GcsUtil(org.apache.beam.sdk.util.GcsUtil)

Aggregations

GcsUtil (org.apache.beam.sdk.util.GcsUtil)6 GcsPath (org.apache.beam.sdk.util.gcsfs.GcsPath)6 ImmutableList (com.google.common.collect.ImmutableList)5 List (java.util.List)5 InvocationOnMock (org.mockito.invocation.InvocationOnMock)5 SeekableByteChannel (java.nio.channels.SeekableByteChannel)3 LinkedList (java.util.LinkedList)3 ArrayList (java.util.ArrayList)2 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)2 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)2 TupleTagList (org.apache.beam.sdk.values.TupleTagList)2 File (java.io.File)1 URI (java.net.URI)1 HashSet (java.util.HashSet)1 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)1