use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testMultiGraphPipelineSerialization.
@Test
public void testMultiGraphPipelineSerialization() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = Pipeline.create(options);
PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));
input.apply(new UnrelatedOutputCreator());
input.apply(new UnboundOutputCreator());
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(PipelineOptionsFactory.as(DataflowPipelineOptions.class));
// Check that translation doesn't fail.
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification jobSpecification = t.translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testBatchGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker.
@Test
public void testBatchGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker() throws Exception {
List<String> experiments = ImmutableList.of("use_runner_v2");
JobSpecification jobSpec = runBatchGroupIntoBatchesAndGetJobSpec(true, experiments);
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
// TODO: should check that the urn is populated, however it is not due to the override in
// DataflowRunner.
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testSetWorkerHarnessContainerImageInPipelineProto.
/**
* Tests that when (deprecated) {@link
* DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline option is set, {@link
* DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of the default
* {@link Environment} used when generating the model pipeline proto.
*/
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
String containerImage = "gcr.io/image:foo";
options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);
Pipeline p = Pipeline.create(options);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testGoodWildcards.
/**
* This tests a few corner cases that should not crash.
*/
@Test
public void testGoodWildcards() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
applyRead(pipeline, "gs://bucket/foo");
applyRead(pipeline, "gs://bucket/foo/");
applyRead(pipeline, "gs://bucket/foo/*");
applyRead(pipeline, "gs://bucket/foo/?");
applyRead(pipeline, "gs://bucket/foo/[0-9]");
applyRead(pipeline, "gs://bucket/foo/*baz*");
applyRead(pipeline, "gs://bucket/foo/*baz?");
applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
applyRead(pipeline, "gs://bucket/foo/baz/*");
applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
applyRead(pipeline, "gs://bucket/foo*/baz");
applyRead(pipeline, "gs://bucket/foo?/baz");
applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
// Check that translation doesn't fail.
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
JobSpecification jobSpecification = t.translate(pipeline, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesWithShardedKeyTranslation.
@Test
public void testStreamingGroupIntoBatchesWithShardedKeyTranslation() throws Exception {
List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT));
JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(true, experiments);
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
assertEquals("true", getString(properties, PropertyNames.USES_KEYED_STATE));
assertTrue(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
assertEquals("true", getString(properties, PropertyNames.ALLOWS_SHARDABLE_STATE));
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
}
Aggregations