Search in sources :

Example 6 with JobSpecification

use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.

the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesTranslationUnifiedWorker.

@Test
public void testStreamingGroupIntoBatchesTranslationUnifiedWorker() throws Exception {
    List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT, "use_runner_v2"));
    JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(false, experiments);
    List<Step> steps = jobSpec.getJob().getSteps();
    Step shardedStateStep = steps.get(steps.size() - 1);
    Map<String, Object> properties = shardedStateStep.getProperties();
    assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
    assertFalse(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
    assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
    // Also checks runner proto is correctly populated.
    Map<String, RunnerApi.PTransform> transformMap = jobSpec.getPipelineProto().getComponents().getTransformsMap();
    boolean transformFound = false;
    for (Map.Entry<String, RunnerApi.PTransform> transform : transformMap.entrySet()) {
        RunnerApi.FunctionSpec spec = transform.getValue().getSpec();
        if (spec.getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_URN)) {
            transformFound = true;
        }
    }
    assertTrue(transformFound);
}
Also used : ArrayList(java.util.ArrayList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Step(com.google.api.services.dataflow.model.Step) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 7 with JobSpecification

use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.

the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker.

@Test
public void testStreamingGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker() throws Exception {
    List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT, "use_runner_v2"));
    JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(true, experiments);
    List<Step> steps = jobSpec.getJob().getSteps();
    Step shardedStateStep = steps.get(steps.size() - 1);
    Map<String, Object> properties = shardedStateStep.getProperties();
    assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
    assertTrue(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
    assertEquals("true", getString(properties, PropertyNames.ALLOWS_SHARDABLE_STATE));
    assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
    assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
    // Also checks the runner proto is correctly populated.
    Map<String, RunnerApi.PTransform> transformMap = jobSpec.getPipelineProto().getComponents().getTransformsMap();
    boolean transformFound = false;
    for (Map.Entry<String, RunnerApi.PTransform> transform : transformMap.entrySet()) {
        RunnerApi.FunctionSpec spec = transform.getValue().getSpec();
        if (spec.getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_WITH_SHARDED_KEY_URN)) {
            for (String subtransform : transform.getValue().getSubtransformsList()) {
                RunnerApi.PTransform ptransform = transformMap.get(subtransform);
                if (ptransform.getSpec().getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_URN)) {
                    transformFound = true;
                }
            }
        }
    }
    assertTrue(transformFound);
    boolean coderFound = false;
    Map<String, RunnerApi.Coder> coderMap = jobSpec.getPipelineProto().getComponents().getCodersMap();
    for (Map.Entry<String, RunnerApi.Coder> coder : coderMap.entrySet()) {
        if (coder.getValue().getSpec().getUrn().equals(ModelCoders.SHARDED_KEY_CODER_URN)) {
            coderFound = true;
        }
    }
    assertTrue(coderFound);
}
Also used : SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ArrayList(java.util.ArrayList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Step(com.google.api.services.dataflow.model.Step) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 8 with JobSpecification

use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.

the class DataflowPipelineTranslatorTest method testBatchGroupIntoBatchesTranslation.

@Test
public void testBatchGroupIntoBatchesTranslation() throws Exception {
    JobSpecification jobSpec = runBatchGroupIntoBatchesAndGetJobSpec(false, Collections.emptyList());
    List<Step> steps = jobSpec.getJob().getSteps();
    Step shardedStateStep = steps.get(steps.size() - 1);
    Map<String, Object> properties = shardedStateStep.getProperties();
    assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
    assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Test(org.junit.Test)

Example 9 with JobSpecification

use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.

the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesTranslation.

@Test
public void testStreamingGroupIntoBatchesTranslation() throws Exception {
    List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT));
    JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(false, experiments);
    List<Step> steps = jobSpec.getJob().getSteps();
    Step shardedStateStep = steps.get(steps.size() - 1);
    Map<String, Object> properties = shardedStateStep.getProperties();
    assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
    assertEquals("true", getString(properties, PropertyNames.USES_KEYED_STATE));
    assertFalse(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
    assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
}
Also used : ArrayList(java.util.ArrayList) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) Step(com.google.api.services.dataflow.model.Step) Test(org.junit.Test)

Example 10 with JobSpecification

use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification in project beam by apache.

the class DataflowPipelineTranslatorTest method testSetSdkContainerImageInPipelineProto.

/**
 * Tests that when {@link DataflowPipelineOptions#setSdkContainerImage(String)} pipeline option is
 * set, {@link DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of
 * the default {@link Environment} used when generating the model pipeline proto.
 */
@Test
public void testSetSdkContainerImageInPipelineProto() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    String containerImage = "gcr.io/image:foo";
    options.as(DataflowPipelineOptions.class).setSdkContainerImage(containerImage);
    Pipeline p = Pipeline.create(options);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
    JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
    assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
    Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
    DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
    assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) DockerPayload(org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

JobSpecification (org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification)14 Test (org.junit.Test)13 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)11 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)11 Step (com.google.api.services.dataflow.model.Step)8 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)8 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)8 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)6 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)6 Pipeline (org.apache.beam.sdk.Pipeline)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)3 PTransform (org.apache.beam.sdk.transforms.PTransform)3 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)3 File (java.io.File)2 Coder (org.apache.beam.sdk.coders.Coder)2 KvCoder (org.apache.beam.sdk.coders.KvCoder)2 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)2 Module (com.fasterxml.jackson.databind.Module)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1