use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesTranslationUnifiedWorker.
@Test
public void testStreamingGroupIntoBatchesTranslationUnifiedWorker() throws Exception {
List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT, "use_runner_v2"));
JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(false, experiments);
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
assertFalse(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
// Also checks runner proto is correctly populated.
Map<String, RunnerApi.PTransform> transformMap = jobSpec.getPipelineProto().getComponents().getTransformsMap();
boolean transformFound = false;
for (Map.Entry<String, RunnerApi.PTransform> transform : transformMap.entrySet()) {
RunnerApi.FunctionSpec spec = transform.getValue().getSpec();
if (spec.getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_URN)) {
transformFound = true;
}
}
assertTrue(transformFound);
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker.
@Test
public void testStreamingGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker() throws Exception {
List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT, "use_runner_v2"));
JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(true, experiments);
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
assertTrue(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
assertEquals("true", getString(properties, PropertyNames.ALLOWS_SHARDABLE_STATE));
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
// Also checks the runner proto is correctly populated.
Map<String, RunnerApi.PTransform> transformMap = jobSpec.getPipelineProto().getComponents().getTransformsMap();
boolean transformFound = false;
for (Map.Entry<String, RunnerApi.PTransform> transform : transformMap.entrySet()) {
RunnerApi.FunctionSpec spec = transform.getValue().getSpec();
if (spec.getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_WITH_SHARDED_KEY_URN)) {
for (String subtransform : transform.getValue().getSubtransformsList()) {
RunnerApi.PTransform ptransform = transformMap.get(subtransform);
if (ptransform.getSpec().getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_URN)) {
transformFound = true;
}
}
}
}
assertTrue(transformFound);
boolean coderFound = false;
Map<String, RunnerApi.Coder> coderMap = jobSpec.getPipelineProto().getComponents().getCodersMap();
for (Map.Entry<String, RunnerApi.Coder> coder : coderMap.entrySet()) {
if (coder.getValue().getSpec().getUrn().equals(ModelCoders.SHARDED_KEY_CODER_URN)) {
coderFound = true;
}
}
assertTrue(coderFound);
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testBatchGroupIntoBatchesTranslation.
@Test
public void testBatchGroupIntoBatchesTranslation() throws Exception {
JobSpecification jobSpec = runBatchGroupIntoBatchesAndGetJobSpec(false, Collections.emptyList());
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesTranslation.
@Test
public void testStreamingGroupIntoBatchesTranslation() throws Exception {
List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT));
JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(false, experiments);
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
assertEquals("true", getString(properties, PropertyNames.USES_KEYED_STATE));
assertFalse(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
}
use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.
the class DataflowPipelineTranslatorTest method testSetSdkContainerImageInPipelineProto.
/**
* Tests that when {@link DataflowPipelineOptions#setSdkContainerImage(String)} pipeline option is
* set, {@link DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of
* the default {@link Environment} used when generating the model pipeline proto.
*/
@Test
public void testSetSdkContainerImageInPipelineProto() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
String containerImage = "gcr.io/image:foo";
options.as(DataflowPipelineOptions.class).setSdkContainerImage(containerImage);
Pipeline p = Pipeline.create(options);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
Aggregations