use of org.apache.beam.runners.fnexecution.control.StageBundleFactory in project flink by apache.
the class PythonTestUtils method createMockJobBundleFactory.
public static JobBundleFactory createMockJobBundleFactory() {
JobBundleFactory jobBundleFactorySpy = spy(JobBundleFactory.class);
StageBundleFactory stageBundleFactorySpy = spy(StageBundleFactory.class);
when(jobBundleFactorySpy.forStage(any())).thenReturn(stageBundleFactorySpy);
ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor = spy(ProcessBundleDescriptors.ExecutableProcessBundleDescriptor.class);
when(stageBundleFactorySpy.getProcessBundleDescriptor()).thenReturn(processBundleDescriptor);
RemoteBundle remoteBundleSpy = spy(RemoteBundle.class);
try {
when(stageBundleFactorySpy.getBundle(any(OutputReceiverFactory.class), any(TimerReceiverFactory.class), any(StateRequestHandler.class), any(BundleProgressHandler.class))).thenReturn(remoteBundleSpy);
} catch (Exception e) {
// ignore
}
Map<String, FnDataReceiver> inputReceivers = new HashMap<>();
FnDataReceiver<WindowedValue<?>> windowedValueReceiverSpy = spy(FnDataReceiver.class);
inputReceivers.put("input", windowedValueReceiverSpy);
when(remoteBundleSpy.getInputReceivers()).thenReturn(inputReceivers);
return jobBundleFactorySpy;
}
use of org.apache.beam.runners.fnexecution.control.StageBundleFactory in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createOperationTransformForExecutableStageNode.
private Function<Node, Node> createOperationTransformForExecutableStageNode(final Network<Node, Edge> network, final String stageName, final DataflowExecutionContext<?> executionContext, final JobBundleFactory jobBundleFactory) {
return new TypeSafeNodeFunction<ExecutableStageNode>(ExecutableStageNode.class) {
@Override
public Node typedApply(ExecutableStageNode input) {
StageBundleFactory stageBundleFactory = jobBundleFactory.forStage(input.getExecutableStage());
Iterable<OutputReceiverNode> outputReceiverNodes = Iterables.filter(network.successors(input), OutputReceiverNode.class);
Map<String, OutputReceiver> outputReceiverMap = new HashMap<>();
Lists.newArrayList(outputReceiverNodes).stream().forEach(outputReceiverNode -> outputReceiverMap.put(outputReceiverNode.getPcollectionId(), outputReceiverNode.getOutputReceiver()));
ImmutableMap.Builder<String, DataflowOperationContext> ptransformIdToOperationContextBuilder = ImmutableMap.builder();
for (Map.Entry<String, NameContext> entry : input.getPTransformIdToPartialNameContextMap().entrySet()) {
NameContext fullNameContext = NameContext.create(stageName, entry.getValue().originalName(), entry.getValue().systemName(), entry.getValue().userName());
DataflowOperationContext operationContext = executionContext.createOperationContext(fullNameContext);
ptransformIdToOperationContextBuilder.put(entry.getKey(), operationContext);
}
ImmutableMap<String, DataflowOperationContext> ptransformIdToOperationContexts = ptransformIdToOperationContextBuilder.build();
ImmutableMap<String, SideInputReader> ptransformIdToSideInputReaders = buildPTransformIdToSideInputReadersMap(executionContext, input, ptransformIdToOperationContexts);
Map<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> ptransformIdToSideInputIdToPCollectionView = buildSideInputIdToPCollectionView(input);
return OperationNode.create(new ProcessRemoteBundleOperation(input.getExecutableStage(), executionContext.createOperationContext(NameContext.create(stageName, stageName, stageName, stageName)), stageBundleFactory, outputReceiverMap, ptransformIdToSideInputReaders, ptransformIdToSideInputIdToPCollectionView));
}
};
}
use of org.apache.beam.runners.fnexecution.control.StageBundleFactory in project beam by apache.
the class ExecutableStageDoFnOperatorTest method outputsAreTaggedCorrectly.
@Test
public void outputsAreTaggedCorrectly() throws Exception {
WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
TupleTag<Integer> additionalOutput1 = new TupleTag<>("output-1");
TupleTag<Integer> additionalOutput2 = new TupleTag<>("output-2");
ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder().put(additionalOutput1, new OutputTag<WindowedValue<String>>(additionalOutput1.getId()) {
}).put(additionalOutput2, new OutputTag<WindowedValue<String>>(additionalOutput2.getId()) {
}).build();
ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder().put(mainOutput, (Coder) coder).put(additionalOutput1, coder).put(additionalOutput2, coder).build();
ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder().put(mainOutput, 0).put(additionalOutput1, 1).put(additionalOutput2, 2).build();
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
WindowedValue<Integer> zero = WindowedValue.valueInGlobalWindow(0);
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
// We use a real StageBundleFactory here in order to exercise the output receiver factory.
StageBundleFactory stageBundleFactory = new StageBundleFactory() {
private boolean onceEmitted;
@Override
public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
return new RemoteBundle() {
@Override
public String getId() {
return "bundle-id";
}
@Override
public Map<String, FnDataReceiver> getInputReceivers() {
return ImmutableMap.of("input", input -> {
/* Ignore input*/
});
}
@Override
public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
return Collections.emptyMap();
}
@Override
public void requestProgress() {
throw new UnsupportedOperationException();
}
@Override
public void split(double fractionOfRemainder) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws Exception {
if (onceEmitted) {
return;
}
// Emit all values to the runner when the bundle is closed.
receiverFactory.create(mainOutput.getId()).accept(three);
receiverFactory.create(additionalOutput1.getId()).accept(four);
receiverFactory.create(additionalOutput2.getId()).accept(five);
onceEmitted = true;
}
};
}
@Override
public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
return processBundleDescriptor;
}
@Override
public InstructionRequestHandler getInstructionRequestHandler() {
return null;
}
@Override
public void close() {
}
};
// Wire the stage bundle factory into our context.
when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), outputManagerFactory);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
long watermark = testHarness.getCurrentWatermark() + 1;
testHarness.open();
testHarness.processElement(new StreamRecord<>(zero));
testHarness.processWatermark(watermark);
watermark++;
testHarness.processWatermark(watermark);
assertEquals(watermark, testHarness.getCurrentWatermark());
// watermark hold until bundle complete
assertEquals(0, testHarness.getOutput().size());
// triggers finish bundle
testHarness.close();
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(three));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1)), contains(new StreamRecord<>(four)));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2)), contains(new StreamRecord<>(five)));
}
use of org.apache.beam.runners.fnexecution.control.StageBundleFactory in project beam by apache.
the class FlinkExecutableStageFunctionTest method outputsAreTaggedCorrectly.
@Test
public void outputsAreTaggedCorrectly() throws Exception {
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
Map<String, Integer> outputTagMap = ImmutableMap.of("one", 1, "two", 2, "three", 3);
// We use a real StageBundleFactory here in order to exercise the output receiver factory.
StageBundleFactory stageBundleFactory = new StageBundleFactory() {
private boolean once;
@Override
public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
return new RemoteBundle() {
@Override
public String getId() {
return "bundle-id";
}
@Override
public Map<String, FnDataReceiver> getInputReceivers() {
return ImmutableMap.of("input", input -> {
/* Ignore input*/
});
}
@Override
public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
return Collections.emptyMap();
}
@Override
public void requestProgress() {
throw new UnsupportedOperationException();
}
@Override
public void split(double fractionOfRemainder) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws Exception {
if (once) {
return;
}
// Emit all values to the runner when the bundle is closed.
receiverFactory.create("one").accept(three);
receiverFactory.create("two").accept(four);
receiverFactory.create("three").accept(five);
once = true;
}
};
}
@Override
public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
return processBundleDescriptor;
}
@Override
public InstructionRequestHandler getInstructionRequestHandler() {
return null;
}
@Override
public void close() throws Exception {
}
};
// Wire the stage bundle factory into our context.
when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
FlinkExecutableStageFunction<Integer> function = getFunction(outputTagMap);
function.open(new Configuration());
if (isStateful) {
function.reduce(Collections.emptyList(), collector);
} else {
function.mapPartition(Collections.emptyList(), collector);
}
// Ensure that the tagged values sent to the collector have the correct union tags as specified
// in the output map.
verify(collector).collect(new RawUnionValue(1, three));
verify(collector).collect(new RawUnionValue(2, four));
verify(collector).collect(new RawUnionValue(3, five));
verifyNoMoreInteractions(collector);
}
use of org.apache.beam.runners.fnexecution.control.StageBundleFactory in project flink by apache.
the class BeamPythonFunctionRunner method open.
// ------------------------------------------------------------------------
@Override
public void open(PythonConfig config) throws Exception {
this.bundleStarted = false;
this.resultBuffer = new LinkedBlockingQueue<>();
this.reusableResultTuple = new Tuple2<>();
// The creation of stageBundleFactory depends on the initialized environment manager.
environmentManager.open();
PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
if (jobOptions.containsKey(PythonOptions.STATE_CACHE_SIZE.key())) {
portableOptions.as(ExperimentalOptions.class).setExperiments(Collections.singletonList(ExperimentalOptions.STATE_CACHE_SIZE + "=" + jobOptions.get(PythonOptions.STATE_CACHE_SIZE.key())));
}
Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
if (memoryManager != null && config.isUsingManagedMemory()) {
Preconditions.checkArgument(managedMemoryFraction > 0 && managedMemoryFraction <= 1.0, "The configured managed memory fraction for Python worker process must be within (0, 1], was: %s. " + "It may be because the consumer type \"Python\" was missing or set to 0 for the config option \"taskmanager.memory.managed.consumer-weights\"." + managedMemoryFraction);
final LongFunctionWithException<PythonSharedResources, Exception> initializer = (size) -> new PythonSharedResources(createJobBundleFactory(pipelineOptions), createPythonExecutionEnvironment(size));
sharedResources = memoryManager.getSharedMemoryResourceForManagedMemory(MANAGED_MEMORY_RESOURCE_ID, initializer, managedMemoryFraction);
LOG.info("Obtained shared Python process of size {} bytes", sharedResources.getSize());
sharedResources.getResourceHandle().addPythonEnvironmentManager(environmentManager);
JobBundleFactory jobBundleFactory = sharedResources.getResourceHandle().getJobBundleFactory();
RunnerApi.Environment environment = sharedResources.getResourceHandle().getEnvironment();
stageBundleFactory = createStageBundleFactory(jobBundleFactory, environment);
} else {
// there is no way to access the MemoryManager for the batch job of old planner,
// fallback to the way that spawning a Python process for each Python operator
jobBundleFactory = createJobBundleFactory(pipelineOptions);
stageBundleFactory = createStageBundleFactory(jobBundleFactory, createPythonExecutionEnvironment(-1));
}
progressHandler = getProgressHandler(flinkMetricContainer);
}
Aggregations