use of org.apache.beam.runners.fnexecution.control.JobBundleFactory in project flink by apache.
the class PythonTestUtils method createMockJobBundleFactory.
public static JobBundleFactory createMockJobBundleFactory() {
JobBundleFactory jobBundleFactorySpy = spy(JobBundleFactory.class);
StageBundleFactory stageBundleFactorySpy = spy(StageBundleFactory.class);
when(jobBundleFactorySpy.forStage(any())).thenReturn(stageBundleFactorySpy);
ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor = spy(ProcessBundleDescriptors.ExecutableProcessBundleDescriptor.class);
when(stageBundleFactorySpy.getProcessBundleDescriptor()).thenReturn(processBundleDescriptor);
RemoteBundle remoteBundleSpy = spy(RemoteBundle.class);
try {
when(stageBundleFactorySpy.getBundle(any(OutputReceiverFactory.class), any(TimerReceiverFactory.class), any(StateRequestHandler.class), any(BundleProgressHandler.class))).thenReturn(remoteBundleSpy);
} catch (Exception e) {
// ignore
}
Map<String, FnDataReceiver> inputReceivers = new HashMap<>();
FnDataReceiver<WindowedValue<?>> windowedValueReceiverSpy = spy(FnDataReceiver.class);
inputReceivers.put("input", windowedValueReceiverSpy);
when(remoteBundleSpy.getInputReceivers()).thenReturn(inputReceivers);
return jobBundleFactorySpy;
}
use of org.apache.beam.runners.fnexecution.control.JobBundleFactory in project beam by apache.
the class BeamFnMapTaskExecutorFactory method create.
/**
* Creates a new {@link DataflowMapTaskExecutor} from the given {@link MapTask} definition using
* the provided {@link ReaderFactory}.
*/
@Override
public DataflowMapTaskExecutor create(InstructionRequestHandler instructionRequestHandler, GrpcFnServer<GrpcDataService> grpcDataFnServer, Endpoints.ApiServiceDescriptor dataApiServiceDescriptor, GrpcFnServer<GrpcStateService> grpcStateFnServer, MutableNetwork<Node, Edge> network, PipelineOptions options, String stageName, ReaderFactory readerFactory, SinkFactory sinkFactory, DataflowExecutionContext<?> executionContext, CounterSet counterSet, IdGenerator idGenerator) {
// TODO: remove this once we trust the code paths
checkArgument(DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "beam_fn_api"), "%s should only be used when beam_fn_api is enabled", getClass().getSimpleName());
// Swap out all the InstructionOutput nodes with OutputReceiver nodes
Networks.replaceDirectedNetworkNodes(network, createOutputReceiversTransform(stageName, counterSet));
if (DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "use_executable_stage_bundle_execution")) {
LOG.debug("Using SingleEnvironmentInstanceJobBundleFactory");
JobBundleFactory jobBundleFactory = SingleEnvironmentInstanceJobBundleFactory.create(StaticRemoteEnvironmentFactory.forService(instructionRequestHandler), grpcDataFnServer, grpcStateFnServer, idGenerator);
// If the use_executable_stage_bundle_execution is enabled, use ExecutableStage instead.
Networks.replaceDirectedNetworkNodes(network, createOperationTransformForExecutableStageNode(network, stageName, executionContext, jobBundleFactory));
} else {
// Swap out all the RegisterFnRequest nodes with Operation nodes
Networks.replaceDirectedNetworkNodes(network, createOperationTransformForRegisterFnNodes(idGenerator, instructionRequestHandler, grpcStateFnServer.getService(), stageName, executionContext));
// Swap out all the RemoteGrpcPort nodes with Operation nodes, note that it is expected
// that the RegisterFnRequest nodes have already been replaced.
Networks.replaceDirectedNetworkNodes(network, createOperationTransformForGrpcPortNodes(network, grpcDataFnServer.getService(), // TODO: Set NameContext properly for these operations.
executionContext.createOperationContext(NameContext.create(stageName, stageName, stageName, stageName))));
}
// Swap out all the FetchAndFilterStreamingSideInput nodes with operation nodes
Networks.replaceDirectedNetworkNodes(network, createOperationTransformForFetchAndFilterStreamingSideInputNodes(network, idGenerator, instructionRequestHandler, grpcDataFnServer.getService(), dataApiServiceDescriptor, executionContext, stageName));
// Swap out all the ParallelInstruction nodes with Operation nodes
Networks.replaceDirectedNetworkNodes(network, createOperationTransformForParallelInstructionNodes(stageName, network, options, readerFactory, sinkFactory, executionContext));
// Collect all the operations within the network and attach all the operations as receivers
// to preceding output receivers.
List<Operation> topoSortedOperations = new ArrayList<>();
for (OperationNode node : Iterables.filter(Networks.topologicalOrder(network), OperationNode.class)) {
topoSortedOperations.add(node.getOperation());
for (Node predecessor : Iterables.filter(network.predecessors(node), OutputReceiverNode.class)) {
((OutputReceiverNode) predecessor).getOutputReceiver().addOutput((Receiver) node.getOperation());
}
}
if (LOG.isDebugEnabled()) {
LOG.info("Map task network: {}", Networks.toDot(network));
}
return BeamFnMapTaskExecutor.withSharedCounterSet(topoSortedOperations, counterSet, executionContext.getExecutionStateTracker());
}
use of org.apache.beam.runners.fnexecution.control.JobBundleFactory in project flink by apache.
the class BeamPythonFunctionRunner method open.
// ------------------------------------------------------------------------
@Override
public void open(PythonConfig config) throws Exception {
this.bundleStarted = false;
this.resultBuffer = new LinkedBlockingQueue<>();
this.reusableResultTuple = new Tuple2<>();
// The creation of stageBundleFactory depends on the initialized environment manager.
environmentManager.open();
PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
if (jobOptions.containsKey(PythonOptions.STATE_CACHE_SIZE.key())) {
portableOptions.as(ExperimentalOptions.class).setExperiments(Collections.singletonList(ExperimentalOptions.STATE_CACHE_SIZE + "=" + jobOptions.get(PythonOptions.STATE_CACHE_SIZE.key())));
}
Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
if (memoryManager != null && config.isUsingManagedMemory()) {
Preconditions.checkArgument(managedMemoryFraction > 0 && managedMemoryFraction <= 1.0, "The configured managed memory fraction for Python worker process must be within (0, 1], was: %s. " + "It may be because the consumer type \"Python\" was missing or set to 0 for the config option \"taskmanager.memory.managed.consumer-weights\"." + managedMemoryFraction);
final LongFunctionWithException<PythonSharedResources, Exception> initializer = (size) -> new PythonSharedResources(createJobBundleFactory(pipelineOptions), createPythonExecutionEnvironment(size));
sharedResources = memoryManager.getSharedMemoryResourceForManagedMemory(MANAGED_MEMORY_RESOURCE_ID, initializer, managedMemoryFraction);
LOG.info("Obtained shared Python process of size {} bytes", sharedResources.getSize());
sharedResources.getResourceHandle().addPythonEnvironmentManager(environmentManager);
JobBundleFactory jobBundleFactory = sharedResources.getResourceHandle().getJobBundleFactory();
RunnerApi.Environment environment = sharedResources.getResourceHandle().getEnvironment();
stageBundleFactory = createStageBundleFactory(jobBundleFactory, environment);
} else {
// there is no way to access the MemoryManager for the batch job of old planner,
// fallback to the way that spawning a Python process for each Python operator
jobBundleFactory = createJobBundleFactory(pipelineOptions);
stageBundleFactory = createStageBundleFactory(jobBundleFactory, createPythonExecutionEnvironment(-1));
}
progressHandler = getProgressHandler(flinkMetricContainer);
}
Aggregations