Search in sources :

Example 1 with PythonConfig

use of org.apache.flink.python.PythonConfig in project flink by apache.

the class PythonDependencyInfoTest method testParsePythonFiles.

@Test
public void testParsePythonFiles() {
    // Skip this test on Windows as we can not control the Window Driver letters.
    Assume.assumeFalse(OperatingSystem.isWindows());
    Configuration config = new Configuration();
    Map<String, String> pythonFiles = new HashMap<>();
    pythonFiles.put("python_file_{SHA256_0}", "test_file1.py");
    pythonFiles.put("python_file_{SHA256_1}", "test_file2.py");
    config.set(PythonDependencyUtils.PYTHON_FILES, pythonFiles);
    PythonDependencyInfo dependencyInfo = PythonDependencyInfo.create(new PythonConfig(config), distributedCache);
    Map<String, String> expected = new HashMap<>();
    expected.put("/distributed_cache/file0", "test_file1.py");
    expected.put("/distributed_cache/file1", "test_file2.py");
    assertEquals(expected, dependencyInfo.getPythonFiles());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) PythonConfig(org.apache.flink.python.PythonConfig) Test(org.junit.Test)

Example 2 with PythonConfig

use of org.apache.flink.python.PythonConfig in project flink by apache.

the class PythonDependencyInfoTest method testParsePythonExec.

@Test
public void testParsePythonExec() {
    Configuration config = new Configuration();
    config.set(PythonOptions.PYTHON_EXECUTABLE, "/usr/bin/python3");
    PythonDependencyInfo dependencyInfo = PythonDependencyInfo.create(new PythonConfig(config), distributedCache);
    assertEquals("/usr/bin/python3", dependencyInfo.getPythonExec());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) PythonConfig(org.apache.flink.python.PythonConfig) Test(org.junit.Test)

Example 3 with PythonConfig

use of org.apache.flink.python.PythonConfig in project flink by apache.

the class PythonDependencyInfoTest method testParsePythonArchives.

@Test
public void testParsePythonArchives() {
    // Skip this test on Windows as we can not control the Window Driver letters.
    Assume.assumeFalse(OperatingSystem.isWindows());
    Configuration config = new Configuration();
    Map<String, String> pythonArchives = new HashMap<>();
    pythonArchives.put("python_archive_{SHA256_0}", "py27.zip");
    pythonArchives.put("python_archive_{SHA256_1}", "py37");
    config.set(PythonDependencyUtils.PYTHON_ARCHIVES, pythonArchives);
    PythonDependencyInfo dependencyInfo = PythonDependencyInfo.create(new PythonConfig(config), distributedCache);
    Map<String, String> expected = new HashMap<>();
    expected.put("/distributed_cache/file4", "py27.zip");
    expected.put("/distributed_cache/file5", "py37");
    assertEquals(expected, dependencyInfo.getArchives());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) PythonConfig(org.apache.flink.python.PythonConfig) Test(org.junit.Test)

Example 4 with PythonConfig

use of org.apache.flink.python.PythonConfig in project flink by apache.

the class PythonDependencyInfoTest method testParsePythonRequirements.

@Test
public void testParsePythonRequirements() throws IOException {
    // Skip this test on Windows as we can not control the Window Driver letters.
    Assume.assumeFalse(OperatingSystem.isWindows());
    Configuration config = new Configuration();
    config.set(PythonDependencyUtils.PYTHON_REQUIREMENTS_FILE, new HashMap<>());
    config.get(PythonDependencyUtils.PYTHON_REQUIREMENTS_FILE).put(PythonDependencyUtils.FILE, "python_requirements_file_{SHA256}");
    PythonDependencyInfo dependencyInfo = PythonDependencyInfo.create(new PythonConfig(config), distributedCache);
    assertEquals("/distributed_cache/file2", dependencyInfo.getRequirementsFilePath().get());
    assertFalse(dependencyInfo.getRequirementsCacheDir().isPresent());
    config.get(PythonDependencyUtils.PYTHON_REQUIREMENTS_FILE).put(PythonDependencyUtils.CACHE, "python_requirements_cache_{SHA256}");
    dependencyInfo = PythonDependencyInfo.create(new PythonConfig(config), distributedCache);
    assertEquals("/distributed_cache/file2", dependencyInfo.getRequirementsFilePath().get());
    assertEquals("/distributed_cache/file3", dependencyInfo.getRequirementsCacheDir().get());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) PythonConfig(org.apache.flink.python.PythonConfig) Test(org.junit.Test)

Example 5 with PythonConfig

use of org.apache.flink.python.PythonConfig in project flink by apache.

the class BeamPythonFunctionRunner method open.

// ------------------------------------------------------------------------
@Override
public void open(PythonConfig config) throws Exception {
    this.bundleStarted = false;
    this.resultBuffer = new LinkedBlockingQueue<>();
    this.reusableResultTuple = new Tuple2<>();
    // The creation of stageBundleFactory depends on the initialized environment manager.
    environmentManager.open();
    PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
    if (jobOptions.containsKey(PythonOptions.STATE_CACHE_SIZE.key())) {
        portableOptions.as(ExperimentalOptions.class).setExperiments(Collections.singletonList(ExperimentalOptions.STATE_CACHE_SIZE + "=" + jobOptions.get(PythonOptions.STATE_CACHE_SIZE.key())));
    }
    Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
    if (memoryManager != null && config.isUsingManagedMemory()) {
        Preconditions.checkArgument(managedMemoryFraction > 0 && managedMemoryFraction <= 1.0, "The configured managed memory fraction for Python worker process must be within (0, 1], was: %s. " + "It may be because the consumer type \"Python\" was missing or set to 0 for the config option \"taskmanager.memory.managed.consumer-weights\"." + managedMemoryFraction);
        final LongFunctionWithException<PythonSharedResources, Exception> initializer = (size) -> new PythonSharedResources(createJobBundleFactory(pipelineOptions), createPythonExecutionEnvironment(size));
        sharedResources = memoryManager.getSharedMemoryResourceForManagedMemory(MANAGED_MEMORY_RESOURCE_ID, initializer, managedMemoryFraction);
        LOG.info("Obtained shared Python process of size {} bytes", sharedResources.getSize());
        sharedResources.getResourceHandle().addPythonEnvironmentManager(environmentManager);
        JobBundleFactory jobBundleFactory = sharedResources.getResourceHandle().getJobBundleFactory();
        RunnerApi.Environment environment = sharedResources.getResourceHandle().getEnvironment();
        stageBundleFactory = createStageBundleFactory(jobBundleFactory, environment);
    } else {
        // there is no way to access the MemoryManager for the batch job of old planner,
        // fallback to the way that spawning a Python process for each Python operator
        jobBundleFactory = createJobBundleFactory(pipelineOptions);
        stageBundleFactory = createStageBundleFactory(jobBundleFactory, createPythonExecutionEnvironment(-1));
    }
    progressHandler = getProgressHandler(flinkMetricContainer);
}
Also used : PythonOptions(org.apache.flink.python.PythonOptions) OpaqueMemoryResource(org.apache.flink.runtime.memory.OpaqueMemoryResource) Arrays(java.util.Arrays) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) LoggerFactory(org.slf4j.LoggerFactory) TimerInternals(org.apache.beam.runners.core.TimerInternals) UserStateReference(org.apache.beam.runners.core.construction.graph.UserStateReference) PythonFunctionRunner(org.apache.flink.python.PythonFunctionRunner) WINDOW_CODER_ID(org.apache.flink.python.Constants.WINDOW_CODER_ID) SideInputReference(org.apache.beam.runners.core.construction.graph.SideInputReference) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Map(java.util.Map) TimerReference(org.apache.beam.runners.core.construction.graph.TimerReference) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FlinkFnApi(org.apache.flink.fnexecution.v1.FlinkFnApi) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) TimerRegistration(org.apache.flink.streaming.api.operators.python.timer.TimerRegistration) INPUT_COLLECTION_ID(org.apache.flink.python.Constants.INPUT_COLLECTION_ID) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) PythonEnvironment(org.apache.flink.python.env.PythonEnvironment) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Collection(java.util.Collection) ImmutableExecutableStage(org.apache.beam.runners.core.construction.graph.ImmutableExecutableStage) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) FlinkMetricContainer(org.apache.flink.python.metric.FlinkMetricContainer) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Preconditions(org.apache.flink.util.Preconditions) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) LongFunctionWithException(org.apache.flink.util.function.LongFunctionWithException) List(java.util.List) WINDOW_STRATEGY(org.apache.flink.python.Constants.WINDOW_STRATEGY) Optional(java.util.Optional) OUTPUT_COLLECTION_ID(org.apache.flink.python.Constants.OUTPUT_COLLECTION_ID) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Coder(org.apache.beam.sdk.coders.Coder) ProcessPythonEnvironmentManager(org.apache.flink.python.env.process.ProcessPythonEnvironmentManager) PipelineOptionsTranslation(org.apache.beam.runners.core.construction.PipelineOptionsTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Environments(org.apache.beam.runners.core.construction.Environments) WRAPPER_TIMER_CODER_ID(org.apache.flink.python.Constants.WRAPPER_TIMER_CODER_ID) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) BiConsumer(java.util.function.BiConsumer) DefaultJobBundleFactory(org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) Nullable(javax.annotation.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Logger(org.slf4j.Logger) ProtoUtils.createCoderProto(org.apache.flink.streaming.api.utils.ProtoUtils.createCoderProto) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessPythonEnvironment(org.apache.flink.python.env.process.ProcessPythonEnvironment) IOException(java.io.IOException) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) Timer(org.apache.beam.runners.core.construction.Timer) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) TIMER_CODER_ID(org.apache.flink.python.Constants.TIMER_CODER_ID) Internal(org.apache.flink.annotation.Internal) Struct(org.apache.beam.vendor.grpc.v1p26p0.com.google.protobuf.Struct) PythonConfig(org.apache.flink.python.PythonConfig) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) DefaultJobBundleFactory(org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) LongFunctionWithException(org.apache.flink.util.function.LongFunctionWithException) IOException(java.io.IOException) Struct(org.apache.beam.vendor.grpc.v1p26p0.com.google.protobuf.Struct)

Aggregations

PythonConfig (org.apache.flink.python.PythonConfig)7 Configuration (org.apache.flink.configuration.Configuration)5 Test (org.junit.Test)4 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Internal (org.apache.flink.annotation.Internal)2 PythonOptions (org.apache.flink.python.PythonOptions)2 FlinkMetricContainer (org.apache.flink.python.metric.FlinkMetricContainer)2 KeyedStateBackend (org.apache.flink.runtime.state.KeyedStateBackend)2 Preconditions (org.apache.flink.util.Preconditions)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Field (java.lang.reflect.Field)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1