Search in sources :

Example 21 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet.

@Test
public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException {
    final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = null;
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setMaxNumWorkers(42);
    options.setAutoscalingAlgorithm(noScaling);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertNull(job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
    assertEquals(42, job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getMaxNumWorkers().intValue());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Job(com.google.api.services.dataflow.model.Job) DataflowPipelineWorkerPoolOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineWorkerPoolOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 22 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method runStreamingGroupIntoBatchesAndGetJobSpec.

private JobSpecification runStreamingGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setExperiments(experiments);
    options.setStreaming(true);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
    if (withShardedKey) {
        input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
    } else {
        input.apply(GroupIntoBatches.ofSize(3));
    }
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) KV(org.apache.beam.sdk.values.KV) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline)

Example 23 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testWorkerMachineTypeConfig.

@Test
public void testWorkerMachineTypeConfig() throws IOException {
    final String testMachineType = "test-machine-type";
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setWorkerMachineType(testMachineType);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
    assertEquals(testMachineType, workerPool.getMachineType());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WorkerPool(com.google.api.services.dataflow.model.WorkerPool) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Job(com.google.api.services.dataflow.model.Job) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 24 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testGoodWildcards.

/**
 * This tests a few corner cases that should not crash.
 */
@Test
public void testGoodWildcards() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline pipeline = Pipeline.create(options);
    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
    applyRead(pipeline, "gs://bucket/foo");
    applyRead(pipeline, "gs://bucket/foo/");
    applyRead(pipeline, "gs://bucket/foo/*");
    applyRead(pipeline, "gs://bucket/foo/?");
    applyRead(pipeline, "gs://bucket/foo/[0-9]");
    applyRead(pipeline, "gs://bucket/foo/*baz*");
    applyRead(pipeline, "gs://bucket/foo/*baz?");
    applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
    applyRead(pipeline, "gs://bucket/foo/baz/*");
    applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
    applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
    applyRead(pipeline, "gs://bucket/foo*/baz");
    applyRead(pipeline, "gs://bucket/foo?/baz");
    applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
    // Check that translation doesn't fail.
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    JobSpecification jobSpecification = t.translate(pipeline, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 25 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testDiskSizeGbConfig.

@Test
public void testDiskSizeGbConfig() throws IOException {
    final Integer diskSizeGb = 1234;
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setDiskSizeGb(diskSizeGb);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Job(com.google.api.services.dataflow.model.Job) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)61 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)48 Test (org.junit.Test)46 Pipeline (org.apache.beam.sdk.Pipeline)37 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)36 Job (com.google.api.services.dataflow.model.Job)25 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)21 KV (org.apache.beam.sdk.values.KV)14 Map (java.util.Map)12 Step (com.google.api.services.dataflow.model.Step)11 ArrayList (java.util.ArrayList)11 List (java.util.List)9 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)9 HashMap (java.util.HashMap)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)7 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)6 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)6