Search in sources :

Example 26 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class CreateExecutableStageNodeFunction method getEnvironmentFromPTransform.

private Environment getEnvironmentFromPTransform(RunnerApi.Components components, Set<PTransformNode> sdkTransforms) {
    RehydratedComponents sdkComponents = RehydratedComponents.forComponents(components);
    Environment env = null;
    for (PTransformNode pTransformNode : sdkTransforms) {
        env = Environments.getEnvironment(pTransformNode.getTransform(), sdkComponents).orElse(null);
        if (env != null) {
            break;
        }
    }
    return env;
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 27 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class RemoteEnvironmentTest method forHandlerReturnsProvided.

@Test
public void forHandlerReturnsProvided() {
    InstructionRequestHandler handler = mock(InstructionRequestHandler.class);
    Environment environment = Environments.createDockerEnvironment("my_url");
    RemoteEnvironment remoteEnvironment = RemoteEnvironment.forHandler(environment, handler);
    assertThat(remoteEnvironment.getEnvironment(), theInstance(environment));
    assertThat(remoteEnvironment.getInstructionRequestHandler(), theInstance(handler));
}
Also used : Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) InstructionRequestHandler(org.apache.beam.runners.fnexecution.control.InstructionRequestHandler) Test(org.junit.Test)

Example 28 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class SingleEnvironmentInstanceJobBundleFactoryTest method closeShutsDownEnvironmentsWhenSomeFail.

@Test
public void closeShutsDownEnvironmentsWhenSomeFail() throws Exception {
    Pipeline p = Pipeline.create();
    ExperimentalOptions.addExperiment(p.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
    p.apply("Create", Create.of(1, 2, 3));
    ExecutableStage firstEnvStage = GreedyPipelineFuser.fuse(PipelineTranslation.toProto(p)).getFusedStages().stream().findFirst().get();
    ExecutableStagePayload basePayload = ExecutableStagePayload.parseFrom(firstEnvStage.toPTransform("foo").getSpec().getPayload());
    Environment secondEnv = Environments.createDockerEnvironment("second_env");
    ExecutableStage secondEnvStage = ExecutableStage.fromPayload(basePayload.toBuilder().setEnvironment(secondEnv).build());
    Environment thirdEnv = Environments.createDockerEnvironment("third_env");
    ExecutableStage thirdEnvStage = ExecutableStage.fromPayload(basePayload.toBuilder().setEnvironment(thirdEnv).build());
    RemoteEnvironment firstRemoteEnv = mock(RemoteEnvironment.class, "First Remote Env");
    RemoteEnvironment secondRemoteEnv = mock(RemoteEnvironment.class, "Second Remote Env");
    RemoteEnvironment thirdRemoteEnv = mock(RemoteEnvironment.class, "Third Remote Env");
    when(environmentFactory.createEnvironment(firstEnvStage.getEnvironment(), GENERATED_ID)).thenReturn(firstRemoteEnv);
    when(environmentFactory.createEnvironment(secondEnvStage.getEnvironment(), GENERATED_ID)).thenReturn(secondRemoteEnv);
    when(environmentFactory.createEnvironment(thirdEnvStage.getEnvironment(), GENERATED_ID)).thenReturn(thirdRemoteEnv);
    when(firstRemoteEnv.getInstructionRequestHandler()).thenReturn(instructionRequestHandler);
    when(secondRemoteEnv.getInstructionRequestHandler()).thenReturn(instructionRequestHandler);
    when(thirdRemoteEnv.getInstructionRequestHandler()).thenReturn(instructionRequestHandler);
    factory.forStage(firstEnvStage);
    factory.forStage(secondEnvStage);
    factory.forStage(thirdEnvStage);
    IllegalStateException firstException = new IllegalStateException("first stage");
    doThrow(firstException).when(firstRemoteEnv).close();
    IllegalStateException thirdException = new IllegalStateException("third stage");
    doThrow(thirdException).when(thirdRemoteEnv).close();
    try {
        factory.close();
        fail("Factory close should have thrown");
    } catch (IllegalStateException expected) {
        if (expected.equals(firstException)) {
            assertThat(ImmutableList.copyOf(expected.getSuppressed()), contains(thirdException));
        } else if (expected.equals(thirdException)) {
            assertThat(ImmutableList.copyOf(expected.getSuppressed()), contains(firstException));
        } else {
            throw expected;
        }
        verify(firstRemoteEnv).close();
        verify(secondRemoteEnv).close();
        verify(thirdRemoteEnv).close();
    }
}
Also used : RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 29 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class DataflowPipelineTranslatorTest method testSetWorkerHarnessContainerImageInPipelineProto.

/**
 * Tests that when (deprecated) {@link
 * DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline option is set, {@link
 * DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of the default
 * {@link Environment} used when generating the model pipeline proto.
 */
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    String containerImage = "gcr.io/image:foo";
    options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);
    Pipeline p = Pipeline.create(options);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
    JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
    assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
    Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
    DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
    assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) DockerPayload(org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 30 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project flink by apache.

the class BeamPythonFunctionRunner method open.

// ------------------------------------------------------------------------
@Override
public void open(PythonConfig config) throws Exception {
    this.bundleStarted = false;
    this.resultBuffer = new LinkedBlockingQueue<>();
    this.reusableResultTuple = new Tuple2<>();
    // The creation of stageBundleFactory depends on the initialized environment manager.
    environmentManager.open();
    PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
    if (jobOptions.containsKey(PythonOptions.STATE_CACHE_SIZE.key())) {
        portableOptions.as(ExperimentalOptions.class).setExperiments(Collections.singletonList(ExperimentalOptions.STATE_CACHE_SIZE + "=" + jobOptions.get(PythonOptions.STATE_CACHE_SIZE.key())));
    }
    Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
    if (memoryManager != null && config.isUsingManagedMemory()) {
        Preconditions.checkArgument(managedMemoryFraction > 0 && managedMemoryFraction <= 1.0, "The configured managed memory fraction for Python worker process must be within (0, 1], was: %s. " + "It may be because the consumer type \"Python\" was missing or set to 0 for the config option \"taskmanager.memory.managed.consumer-weights\"." + managedMemoryFraction);
        final LongFunctionWithException<PythonSharedResources, Exception> initializer = (size) -> new PythonSharedResources(createJobBundleFactory(pipelineOptions), createPythonExecutionEnvironment(size));
        sharedResources = memoryManager.getSharedMemoryResourceForManagedMemory(MANAGED_MEMORY_RESOURCE_ID, initializer, managedMemoryFraction);
        LOG.info("Obtained shared Python process of size {} bytes", sharedResources.getSize());
        sharedResources.getResourceHandle().addPythonEnvironmentManager(environmentManager);
        JobBundleFactory jobBundleFactory = sharedResources.getResourceHandle().getJobBundleFactory();
        RunnerApi.Environment environment = sharedResources.getResourceHandle().getEnvironment();
        stageBundleFactory = createStageBundleFactory(jobBundleFactory, environment);
    } else {
        // there is no way to access the MemoryManager for the batch job of old planner,
        // fallback to the way that spawning a Python process for each Python operator
        jobBundleFactory = createJobBundleFactory(pipelineOptions);
        stageBundleFactory = createStageBundleFactory(jobBundleFactory, createPythonExecutionEnvironment(-1));
    }
    progressHandler = getProgressHandler(flinkMetricContainer);
}
Also used : PythonOptions(org.apache.flink.python.PythonOptions) OpaqueMemoryResource(org.apache.flink.runtime.memory.OpaqueMemoryResource) Arrays(java.util.Arrays) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) LoggerFactory(org.slf4j.LoggerFactory) TimerInternals(org.apache.beam.runners.core.TimerInternals) UserStateReference(org.apache.beam.runners.core.construction.graph.UserStateReference) PythonFunctionRunner(org.apache.flink.python.PythonFunctionRunner) WINDOW_CODER_ID(org.apache.flink.python.Constants.WINDOW_CODER_ID) SideInputReference(org.apache.beam.runners.core.construction.graph.SideInputReference) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Map(java.util.Map) TimerReference(org.apache.beam.runners.core.construction.graph.TimerReference) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FlinkFnApi(org.apache.flink.fnexecution.v1.FlinkFnApi) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) TimerRegistration(org.apache.flink.streaming.api.operators.python.timer.TimerRegistration) INPUT_COLLECTION_ID(org.apache.flink.python.Constants.INPUT_COLLECTION_ID) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) PythonEnvironment(org.apache.flink.python.env.PythonEnvironment) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Collection(java.util.Collection) ImmutableExecutableStage(org.apache.beam.runners.core.construction.graph.ImmutableExecutableStage) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) FlinkMetricContainer(org.apache.flink.python.metric.FlinkMetricContainer) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Preconditions(org.apache.flink.util.Preconditions) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) LongFunctionWithException(org.apache.flink.util.function.LongFunctionWithException) List(java.util.List) WINDOW_STRATEGY(org.apache.flink.python.Constants.WINDOW_STRATEGY) Optional(java.util.Optional) OUTPUT_COLLECTION_ID(org.apache.flink.python.Constants.OUTPUT_COLLECTION_ID) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Coder(org.apache.beam.sdk.coders.Coder) ProcessPythonEnvironmentManager(org.apache.flink.python.env.process.ProcessPythonEnvironmentManager) PipelineOptionsTranslation(org.apache.beam.runners.core.construction.PipelineOptionsTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Environments(org.apache.beam.runners.core.construction.Environments) WRAPPER_TIMER_CODER_ID(org.apache.flink.python.Constants.WRAPPER_TIMER_CODER_ID) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) BiConsumer(java.util.function.BiConsumer) DefaultJobBundleFactory(org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) Nullable(javax.annotation.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Logger(org.slf4j.Logger) ProtoUtils.createCoderProto(org.apache.flink.streaming.api.utils.ProtoUtils.createCoderProto) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessPythonEnvironment(org.apache.flink.python.env.process.ProcessPythonEnvironment) IOException(java.io.IOException) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) Timer(org.apache.beam.runners.core.construction.Timer) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) TIMER_CODER_ID(org.apache.flink.python.Constants.TIMER_CODER_ID) Internal(org.apache.flink.annotation.Internal) Struct(org.apache.beam.vendor.grpc.v1p26p0.com.google.protobuf.Struct) PythonConfig(org.apache.flink.python.PythonConfig) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) DefaultJobBundleFactory(org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) LongFunctionWithException(org.apache.flink.util.function.LongFunctionWithException) IOException(java.io.IOException) Struct(org.apache.beam.vendor.grpc.v1p26p0.com.google.protobuf.Struct)

Aggregations

Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)33 Test (org.junit.Test)28 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)17 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)14 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)12 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)11 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 Map (java.util.Map)7 RemoteEnvironment (org.apache.beam.runners.fnexecution.environment.RemoteEnvironment)7 Pipeline (org.apache.beam.sdk.Pipeline)7 IOException (java.io.IOException)6 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)5 EnvironmentFactory (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory)5 Provider (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider)5 ServerFactory (org.apache.beam.sdk.fn.server.ServerFactory)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 ArrayList (java.util.ArrayList)4