Search in sources :

Example 26 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method createSdkComponents.

private SdkComponents createSdkComponents(PipelineOptions options) {
    SdkComponents sdkComponents = SdkComponents.create();
    String containerImageURL = DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class));
    RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment(containerImageURL);
    sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
    return sdkComponents;
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents)

Example 27 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method runBatchGroupIntoBatchesAndGetJobSpec.

private JobSpecification runBatchGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setExperiments(experiments);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
    if (withShardedKey) {
        input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
    } else {
        input.apply(GroupIntoBatches.ofSize(3));
    }
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) KV(org.apache.beam.sdk.values.KV) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline)

Example 28 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testNetworkConfigMissing.

@Test
public void testNetworkConfigMissing() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Job(com.google.api.services.dataflow.model.Job) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 29 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testTaggedNamesOverridden.

/**
 * Test that in translation the name for collections of a multi-output ParDo - a special case
 * because the user can name tags - are overridden to be what the Dataflow service expects.
 */
@Test
public void testTaggedNamesOverridden() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {
    };
    TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {
    };
    TupleTag<Integer> tag3 = new TupleTag<Integer>() {
    };
    PCollectionTuple outputs = pipeline.apply(Create.of(3)).apply(ParDo.of(new DoFn<Integer, Integer>() {

        @ProcessElement
        public void drop() {
        }
    }).withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));
    outputs.get(tag1).setName("bizbazzle");
    outputs.get(tag2).setName("gonzaggle");
    outputs.get(tag3).setName("froonazzle");
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    // The ParDo step
    Step step = job.getSteps().get(1);
    String stepName = getString(step.getProperties(), PropertyNames.USER_NAME);
    List<Map<String, Object>> outputInfos = Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);
    assertThat(outputInfos.size(), equalTo(3));
    // The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
    for (int i = 0; i < outputInfos.size(); ++i) {
        assertThat(getString(outputInfos.get(i), PropertyNames.USER_NAME), equalTo(String.format("%s.out%s", stepName, i)));
    }
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TupleTag(org.apache.beam.sdk.values.TupleTag) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Job(com.google.api.services.dataflow.model.Job) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Example 30 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class WindowMappingFnRunnerTest method testWindowMapping.

@Test
public void testWindowMapping() throws Exception {
    String pTransformId = "pTransformId";
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(WindowMappingFnRunner.URN).setPayload(ParDoTranslation.translateWindowMappingFn(new GlobalWindows().getDefaultWindowMappingFn(), components).toByteString()).build();
    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();
    ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction = WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform);
    KV<Object, BoundedWindow> input = KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1)));
    assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) KV(org.apache.beam.sdk.values.KV) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)61 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)48 Test (org.junit.Test)46 Pipeline (org.apache.beam.sdk.Pipeline)37 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)36 Job (com.google.api.services.dataflow.model.Job)25 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)21 KV (org.apache.beam.sdk.values.KV)14 Map (java.util.Map)12 Step (com.google.api.services.dataflow.model.Step)11 ArrayList (java.util.ArrayList)11 List (java.util.List)9 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)9 HashMap (java.util.HashMap)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)7 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)6 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)6