Search in sources :

Example 16 with Struct

use of org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct in project beam by apache.

the class DefaultJobBundleFactoryTest method createsMultipleEnvironmentsWithSdkWorkerParallelism.

@Test
public void createsMultipleEnvironmentsWithSdkWorkerParallelism() throws Exception {
    ServerFactory serverFactory = ServerFactory.createDefault();
    Environment environmentA = Environment.newBuilder().setUrn("env:urn:a").setPayload(ByteString.copyFrom(new byte[1])).build();
    EnvironmentFactory envFactoryA = mock(EnvironmentFactory.class);
    when(envFactoryA.createEnvironment(eq(environmentA), any())).thenReturn(remoteEnvironment);
    EnvironmentFactory.Provider environmentProviderFactoryA = mock(EnvironmentFactory.Provider.class);
    when(environmentProviderFactoryA.createEnvironmentFactory(any(), any(), any(), any(), any(), any())).thenReturn(envFactoryA);
    when(environmentProviderFactoryA.getServerFactory()).thenReturn(serverFactory);
    Map<String, Provider> environmentFactoryProviderMap = ImmutableMap.of(environmentA.getUrn(), environmentProviderFactoryA);
    PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
    portableOptions.setSdkWorkerParallelism(2);
    Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
    try (DefaultJobBundleFactory bundleFactory = new DefaultJobBundleFactory(JobInfo.create("testJob", "testJob", "token", pipelineOptions), environmentFactoryProviderMap, stageIdGenerator, serverInfo)) {
        bundleFactory.forStage(getExecutableStage(environmentA));
        verify(environmentProviderFactoryA, Mockito.times(1)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(envFactoryA, Mockito.times(1)).createEnvironment(eq(environmentA), any());
        bundleFactory.forStage(getExecutableStage(environmentA));
        verify(environmentProviderFactoryA, Mockito.times(2)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(envFactoryA, Mockito.times(2)).createEnvironment(eq(environmentA), any());
        // round robin, no new environment created
        bundleFactory.forStage(getExecutableStage(environmentA));
        verify(environmentProviderFactoryA, Mockito.times(2)).createEnvironmentFactory(any(), any(), any(), any(), any(), any());
        verify(envFactoryA, Mockito.times(2)).createEnvironment(eq(environmentA), any());
    }
    portableOptions.setSdkWorkerParallelism(0);
    pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
    Mockito.reset(envFactoryA);
    when(envFactoryA.createEnvironment(eq(environmentA), any())).thenReturn(remoteEnvironment);
    int expectedParallelism = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
    try (DefaultJobBundleFactory bundleFactory = new DefaultJobBundleFactory(JobInfo.create("testJob", "testJob", "token", pipelineOptions), environmentFactoryProviderMap, stageIdGenerator, serverInfo)) {
        HashSet<StageBundleFactory> stageBundleFactorySet = new HashSet<>();
        // more factories than parallelism for round-robin
        int numStageBundleFactories = expectedParallelism + 5;
        for (int i = 0; i < numStageBundleFactories; i++) {
            stageBundleFactorySet.add(bundleFactory.forStage(getExecutableStage(environmentA)));
        }
        verify(envFactoryA, Mockito.times(expectedParallelism)).createEnvironment(eq(environmentA), any());
        Assert.assertEquals(numStageBundleFactories, stageBundleFactorySet.size());
    }
}
Also used : ServerFactory(org.apache.beam.sdk.fn.server.ServerFactory) Matchers.containsString(org.hamcrest.Matchers.containsString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Provider(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct) EnvironmentFactory(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory) Provider(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with Struct

use of org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct in project beam by apache.

the class DefaultJobBundleFactoryTest method rejectsStateCachingWithLoadBalancing.

@Test
public void rejectsStateCachingWithLoadBalancing() throws Exception {
    PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
    portableOptions.setLoadBalanceBundles(true);
    ExperimentalOptions options = portableOptions.as(ExperimentalOptions.class);
    ExperimentalOptions.addExperiment(options, "state_cache_size=1");
    Struct pipelineOptions = PipelineOptionsTranslation.toProto(options);
    Exception e = Assert.assertThrows(IllegalArgumentException.class, () -> new DefaultJobBundleFactory(JobInfo.create("testJob", "testJob", "token", pipelineOptions), envFactoryProviderMap, stageIdGenerator, serverInfo).close());
    assertThat(e.getMessage(), containsString("state_cache_size"));
}
Also used : PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ExpectedException(org.junit.rules.ExpectedException) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct) Test(org.junit.Test)

Example 18 with Struct

use of org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct in project beam by apache.

the class SparkPipelineRunner method main.

/**
 * Main method to be called only as the entry point to an executable jar with structure as defined
 * in {@link PortablePipelineJarUtils}.
 */
public static void main(String[] args) throws Exception {
    // Register standard file systems.
    FileSystems.setDefaultPipelineOptions(PipelineOptionsFactory.create());
    SparkPipelineRunnerConfiguration configuration = parseArgs(args);
    String baseJobName = configuration.baseJobName == null ? PortablePipelineJarUtils.getDefaultJobName() : configuration.baseJobName;
    Preconditions.checkArgument(baseJobName != null, "No default job name found. Job name must be set using --base-job-name.");
    Pipeline pipeline = PortablePipelineJarUtils.getPipelineFromClasspath(baseJobName);
    Struct originalOptions = PortablePipelineJarUtils.getPipelineOptionsFromClasspath(baseJobName);
    // The retrieval token is only required by the legacy artifact service, which the Spark runner
    // no longer uses.
    String retrievalToken = ArtifactApi.CommitManifestResponse.Constants.NO_ARTIFACTS_STAGED_TOKEN.getValueDescriptor().getOptions().getExtension(RunnerApi.beamConstant);
    SparkPipelineOptions sparkOptions = PipelineOptionsTranslation.fromProto(originalOptions).as(SparkPipelineOptions.class);
    String invocationId = String.format("%s_%s", sparkOptions.getJobName(), UUID.randomUUID().toString());
    if (sparkOptions.getAppName() == null) {
        LOG.debug("App name was null. Using invocationId {}", invocationId);
        sparkOptions.setAppName(invocationId);
    }
    SparkPipelineRunner runner = new SparkPipelineRunner(sparkOptions);
    JobInfo jobInfo = JobInfo.create(invocationId, sparkOptions.getJobName(), retrievalToken, PipelineOptionsTranslation.toProto(sparkOptions));
    try {
        runner.run(pipeline, jobInfo);
    } catch (Exception e) {
        throw new RuntimeException(String.format("Job %s failed.", invocationId), e);
    }
    LOG.info("Job {} finished successfully.", invocationId);
}
Also used : JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) CmdLineException(org.kohsuke.args4j.CmdLineException) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct)

Aggregations

Test (org.junit.Test)13 Struct (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct)9 Struct (com.google.protobuf.Struct)8 PortablePipelineOptions (org.apache.beam.sdk.options.PortablePipelineOptions)5 ListValue (com.google.protobuf.ListValue)3 TestStruct (com.google.protobuf.util.JsonTestProto.TestStruct)3 JobInfo (org.apache.beam.runners.fnexecution.provisioning.JobInfo)3 StateRequestHandler (org.apache.beam.runners.fnexecution.state.StateRequestHandler)3 TypedStruct (com.github.udpa.udpa.type.v1.TypedStruct)2 Any (com.google.protobuf.Any)2 BoolValue (com.google.protobuf.BoolValue)2 BytesValue (com.google.protobuf.BytesValue)2 DoubleValue (com.google.protobuf.DoubleValue)2 FloatValue (com.google.protobuf.FloatValue)2 Int32Value (com.google.protobuf.Int32Value)2 Int64Value (com.google.protobuf.Int64Value)2 NullValue (com.google.protobuf.NullValue)2 StringValue (com.google.protobuf.StringValue)2 UInt32Value (com.google.protobuf.UInt32Value)2 UInt64Value (com.google.protobuf.UInt64Value)2