Search in sources :

Example 1 with FlinkMetricContainer

use of org.apache.flink.python.metric.FlinkMetricContainer in project flink by apache.

the class BeamPythonFunctionRunner method open.

// ------------------------------------------------------------------------
@Override
public void open(PythonConfig config) throws Exception {
    this.bundleStarted = false;
    this.resultBuffer = new LinkedBlockingQueue<>();
    this.reusableResultTuple = new Tuple2<>();
    // The creation of stageBundleFactory depends on the initialized environment manager.
    environmentManager.open();
    PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
    if (jobOptions.containsKey(PythonOptions.STATE_CACHE_SIZE.key())) {
        portableOptions.as(ExperimentalOptions.class).setExperiments(Collections.singletonList(ExperimentalOptions.STATE_CACHE_SIZE + "=" + jobOptions.get(PythonOptions.STATE_CACHE_SIZE.key())));
    }
    Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
    if (memoryManager != null && config.isUsingManagedMemory()) {
        Preconditions.checkArgument(managedMemoryFraction > 0 && managedMemoryFraction <= 1.0, "The configured managed memory fraction for Python worker process must be within (0, 1], was: %s. " + "It may be because the consumer type \"Python\" was missing or set to 0 for the config option \"taskmanager.memory.managed.consumer-weights\"." + managedMemoryFraction);
        final LongFunctionWithException<PythonSharedResources, Exception> initializer = (size) -> new PythonSharedResources(createJobBundleFactory(pipelineOptions), createPythonExecutionEnvironment(size));
        sharedResources = memoryManager.getSharedMemoryResourceForManagedMemory(MANAGED_MEMORY_RESOURCE_ID, initializer, managedMemoryFraction);
        LOG.info("Obtained shared Python process of size {} bytes", sharedResources.getSize());
        sharedResources.getResourceHandle().addPythonEnvironmentManager(environmentManager);
        JobBundleFactory jobBundleFactory = sharedResources.getResourceHandle().getJobBundleFactory();
        RunnerApi.Environment environment = sharedResources.getResourceHandle().getEnvironment();
        stageBundleFactory = createStageBundleFactory(jobBundleFactory, environment);
    } else {
        // there is no way to access the MemoryManager for the batch job of old planner,
        // fallback to the way that spawning a Python process for each Python operator
        jobBundleFactory = createJobBundleFactory(pipelineOptions);
        stageBundleFactory = createStageBundleFactory(jobBundleFactory, createPythonExecutionEnvironment(-1));
    }
    progressHandler = getProgressHandler(flinkMetricContainer);
}
Also used : PythonOptions(org.apache.flink.python.PythonOptions) OpaqueMemoryResource(org.apache.flink.runtime.memory.OpaqueMemoryResource) Arrays(java.util.Arrays) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) LoggerFactory(org.slf4j.LoggerFactory) TimerInternals(org.apache.beam.runners.core.TimerInternals) UserStateReference(org.apache.beam.runners.core.construction.graph.UserStateReference) PythonFunctionRunner(org.apache.flink.python.PythonFunctionRunner) WINDOW_CODER_ID(org.apache.flink.python.Constants.WINDOW_CODER_ID) SideInputReference(org.apache.beam.runners.core.construction.graph.SideInputReference) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Map(java.util.Map) TimerReference(org.apache.beam.runners.core.construction.graph.TimerReference) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FlinkFnApi(org.apache.flink.fnexecution.v1.FlinkFnApi) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) TimerRegistration(org.apache.flink.streaming.api.operators.python.timer.TimerRegistration) INPUT_COLLECTION_ID(org.apache.flink.python.Constants.INPUT_COLLECTION_ID) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) PythonEnvironment(org.apache.flink.python.env.PythonEnvironment) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Collection(java.util.Collection) ImmutableExecutableStage(org.apache.beam.runners.core.construction.graph.ImmutableExecutableStage) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) FlinkMetricContainer(org.apache.flink.python.metric.FlinkMetricContainer) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Preconditions(org.apache.flink.util.Preconditions) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) LongFunctionWithException(org.apache.flink.util.function.LongFunctionWithException) List(java.util.List) WINDOW_STRATEGY(org.apache.flink.python.Constants.WINDOW_STRATEGY) Optional(java.util.Optional) OUTPUT_COLLECTION_ID(org.apache.flink.python.Constants.OUTPUT_COLLECTION_ID) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Coder(org.apache.beam.sdk.coders.Coder) ProcessPythonEnvironmentManager(org.apache.flink.python.env.process.ProcessPythonEnvironmentManager) PipelineOptionsTranslation(org.apache.beam.runners.core.construction.PipelineOptionsTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Environments(org.apache.beam.runners.core.construction.Environments) WRAPPER_TIMER_CODER_ID(org.apache.flink.python.Constants.WRAPPER_TIMER_CODER_ID) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) BiConsumer(java.util.function.BiConsumer) DefaultJobBundleFactory(org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) Nullable(javax.annotation.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Logger(org.slf4j.Logger) ProtoUtils.createCoderProto(org.apache.flink.streaming.api.utils.ProtoUtils.createCoderProto) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessPythonEnvironment(org.apache.flink.python.env.process.ProcessPythonEnvironment) IOException(java.io.IOException) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) Timer(org.apache.beam.runners.core.construction.Timer) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) TIMER_CODER_ID(org.apache.flink.python.Constants.TIMER_CODER_ID) Internal(org.apache.flink.annotation.Internal) Struct(org.apache.beam.vendor.grpc.v1p26p0.com.google.protobuf.Struct) PythonConfig(org.apache.flink.python.PythonConfig) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) DefaultJobBundleFactory(org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) LongFunctionWithException(org.apache.flink.util.function.LongFunctionWithException) IOException(java.io.IOException) Struct(org.apache.beam.vendor.grpc.v1p26p0.com.google.protobuf.Struct)

Aggregations

ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 BiConsumer (java.util.function.BiConsumer)1 Collectors (java.util.stream.Collectors)1 Nullable (javax.annotation.Nullable)1 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)1 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)1 TimerInternals (org.apache.beam.runners.core.TimerInternals)1 BeamUrns.getUrn (org.apache.beam.runners.core.construction.BeamUrns.getUrn)1 Environments (org.apache.beam.runners.core.construction.Environments)1 ModelCoders (org.apache.beam.runners.core.construction.ModelCoders)1 PipelineOptionsTranslation (org.apache.beam.runners.core.construction.PipelineOptionsTranslation)1 Timer (org.apache.beam.runners.core.construction.Timer)1