Search in sources :

Example 96 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowPipelineTranslatorTest method testNetworkConfig.

@Test
public void testNetworkConfig() throws IOException {
    final String testNetwork = "test-network";
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setNetwork(testNetwork);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Job(com.google.api.services.dataflow.model.Job) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 97 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowRunnerTest method testStageArtifactWithoutStagedName.

@Test
public void testStageArtifactWithoutStagedName() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    File temp1 = File.createTempFile("artifact1-", ".txt");
    temp1.deleteOnExit();
    File temp2 = File.createTempFile("artifact2-", ".txt");
    temp2.deleteOnExit();
    RunnerApi.ArtifactInformation fooLocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(temp1.getAbsolutePath()).build().toByteString()).build();
    RunnerApi.ArtifactInformation barLocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(temp2.getAbsolutePath()).build().toByteString()).build();
    RunnerApi.Pipeline pipeline = RunnerApi.Pipeline.newBuilder().setComponents(RunnerApi.Components.newBuilder().putEnvironments("env", RunnerApi.Environment.newBuilder().addAllDependencies(ImmutableList.of(fooLocalArtifact, barLocalArtifact)).build())).build();
    List<DataflowPackage> packages = runner.stageArtifacts(pipeline);
    for (DataflowPackage pkg : packages) {
        assertThat(pkg.getName(), matchesRegex("artifact[1,2]-.+\\.txt"));
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) File(java.io.File) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 98 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowRunnerTest method testTransformTranslatorMissing.

@Test
public void testTransformTranslatorMissing() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline p = Pipeline.create(options);
    p.apply(Create.of(Arrays.asList(1, 2, 3))).apply(new TestTransform());
    thrown.expect(IllegalStateException.class);
    thrown.expectMessage(containsString("no translator registered"));
    SdkComponents sdkComponents = SdkComponents.create(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
    DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
    Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
    assertValidJob(jobCaptor.getValue());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 99 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowRunnerTest method testResolveArtifacts.

@Test
public void testResolveArtifacts() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    String stagingLocation = options.getStagingLocation().replaceFirst("/$", "");
    RunnerApi.ArtifactInformation fooLocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath("/tmp/foo.jar").build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("foo_staged.jar").build().toByteString()).build();
    RunnerApi.ArtifactInformation barLocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath("/tmp/bar.jar").build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("bar_staged.jar").build().toByteString()).build();
    RunnerApi.Pipeline pipeline = RunnerApi.Pipeline.newBuilder().setComponents(RunnerApi.Components.newBuilder().putEnvironments("env", RunnerApi.Environment.newBuilder().addAllDependencies(ImmutableList.of(fooLocalArtifact, barLocalArtifact)).build())).build();
    RunnerApi.ArtifactInformation fooStagedArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.URL)).setTypePayload(RunnerApi.ArtifactUrlPayload.newBuilder().setUrl(stagingLocation + "/foo_staged.jar").build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("foo_staged.jar").build().toByteString()).build();
    RunnerApi.ArtifactInformation barStagedArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.URL)).setTypePayload(RunnerApi.ArtifactUrlPayload.newBuilder().setUrl(stagingLocation + "/bar_staged.jar").build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("bar_staged.jar").build().toByteString()).build();
    RunnerApi.Pipeline expectedPipeline = RunnerApi.Pipeline.newBuilder().setComponents(RunnerApi.Components.newBuilder().putEnvironments("env", RunnerApi.Environment.newBuilder().addAllDependencies(ImmutableList.of(fooStagedArtifact, barStagedArtifact)).build())).build();
    assertThat(runner.resolveArtifacts(pipeline), equalTo(expectedPipeline));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 100 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowRunnerTest method testStageDuplicatedArtifacts.

@Test
public void testStageDuplicatedArtifacts() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    File foo = File.createTempFile("foo-", ".txt");
    foo.deleteOnExit();
    File bar = File.createTempFile("bar-", ".txt");
    bar.deleteOnExit();
    RunnerApi.ArtifactInformation foo1LocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(foo.getAbsolutePath()).build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("foo_staged1.jar").build().toByteString()).build();
    RunnerApi.ArtifactInformation foo2LocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(foo.getAbsolutePath()).build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("foo_staged2.jar").build().toByteString()).build();
    RunnerApi.ArtifactInformation barLocalArtifact = RunnerApi.ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE)).setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(bar.getAbsolutePath()).build().toByteString()).setRoleUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO)).setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName("bar_staged.jar").build().toByteString()).build();
    RunnerApi.Environment env1 = RunnerApi.Environment.newBuilder().addAllDependencies(ImmutableList.of(foo1LocalArtifact, barLocalArtifact)).build();
    RunnerApi.Environment env2 = RunnerApi.Environment.newBuilder().addAllDependencies(ImmutableList.of(foo2LocalArtifact, barLocalArtifact)).build();
    RunnerApi.Pipeline pipeline = RunnerApi.Pipeline.newBuilder().setComponents(RunnerApi.Components.newBuilder().putEnvironments("env1", env1).putEnvironments("env2", env2)).build();
    List<DataflowPackage> packages = runner.stageArtifacts(pipeline);
    List<String> packageNames = packages.stream().map(DataflowPackage::getName).collect(Collectors.toList());
    assertThat(packageNames.size(), equalTo(3));
    assertThat(packageNames, containsInAnyOrder("foo_staged1.jar", "foo_staged2.jar", "bar_staged.jar"));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) File(java.io.File) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)117 Test (org.junit.Test)87 Pipeline (org.apache.beam.sdk.Pipeline)82 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)44 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)43 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)38 Map (java.util.Map)32 KV (org.apache.beam.sdk.values.KV)26 Job (com.google.api.services.dataflow.model.Job)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)24 KvCoder (org.apache.beam.sdk.coders.KvCoder)24 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 Coder (org.apache.beam.sdk.coders.Coder)23 ArrayList (java.util.ArrayList)22 WindowedValue (org.apache.beam.sdk.util.WindowedValue)22 HashMap (java.util.HashMap)20 List (java.util.List)20 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)19 IOException (java.io.IOException)18 PCollection (org.apache.beam.sdk.values.PCollection)18