use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.
the class ExecutableStageDoFnOperatorTest method outputsAreTaggedCorrectly.
@Test
public void outputsAreTaggedCorrectly() throws Exception {
WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
TupleTag<Integer> additionalOutput1 = new TupleTag<>("output-1");
TupleTag<Integer> additionalOutput2 = new TupleTag<>("output-2");
ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder().put(additionalOutput1, new OutputTag<WindowedValue<String>>(additionalOutput1.getId()) {
}).put(additionalOutput2, new OutputTag<WindowedValue<String>>(additionalOutput2.getId()) {
}).build();
ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder().put(mainOutput, (Coder) coder).put(additionalOutput1, coder).put(additionalOutput2, coder).build();
ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder().put(mainOutput, 0).put(additionalOutput1, 1).put(additionalOutput2, 2).build();
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
WindowedValue<Integer> zero = WindowedValue.valueInGlobalWindow(0);
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
// We use a real StageBundleFactory here in order to exercise the output receiver factory.
StageBundleFactory stageBundleFactory = new StageBundleFactory() {
private boolean onceEmitted;
@Override
public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
return new RemoteBundle() {
@Override
public String getId() {
return "bundle-id";
}
@Override
public Map<String, FnDataReceiver> getInputReceivers() {
return ImmutableMap.of("input", input -> {
/* Ignore input*/
});
}
@Override
public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
return Collections.emptyMap();
}
@Override
public void requestProgress() {
throw new UnsupportedOperationException();
}
@Override
public void split(double fractionOfRemainder) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws Exception {
if (onceEmitted) {
return;
}
// Emit all values to the runner when the bundle is closed.
receiverFactory.create(mainOutput.getId()).accept(three);
receiverFactory.create(additionalOutput1.getId()).accept(four);
receiverFactory.create(additionalOutput2.getId()).accept(five);
onceEmitted = true;
}
};
}
@Override
public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
return processBundleDescriptor;
}
@Override
public InstructionRequestHandler getInstructionRequestHandler() {
return null;
}
@Override
public void close() {
}
};
// Wire the stage bundle factory into our context.
when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), outputManagerFactory);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
long watermark = testHarness.getCurrentWatermark() + 1;
testHarness.open();
testHarness.processElement(new StreamRecord<>(zero));
testHarness.processWatermark(watermark);
watermark++;
testHarness.processWatermark(watermark);
assertEquals(watermark, testHarness.getCurrentWatermark());
// watermark hold until bundle complete
assertEquals(0, testHarness.getOutput().size());
// triggers finish bundle
testHarness.close();
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(three));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1)), contains(new StreamRecord<>(four)));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2)), contains(new StreamRecord<>(five)));
}
use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.
the class ExecutableStageDoFnOperator method open.
@Override
public void open() throws Exception {
executableStage = ExecutableStage.fromPayload(payload);
hasSdfProcessFn = hasSDF(executableStage);
initializeUserState(executableStage, getKeyedStateBackend(), pipelineOptions);
// TODO: Wire this into the distributed cache and make it pluggable.
// TODO: Do we really want this layer of indirection when accessing the stage bundle factory?
// It's a little strange because this operator is responsible for the lifetime of the stage
// bundle "factory" (manager?) but not the job or Flink bundle factories. How do we make
// ownership of the higher level "factories" explicit? Do we care?
stageContext = contextFactory.get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
stateRequestHandler = getStateRequestHandler(executableStage);
progressHandler = new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
if (flinkMetricContainer != null) {
flinkMetricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
}
}
@Override
public void onCompleted(ProcessBundleResponse response) {
if (flinkMetricContainer != null) {
flinkMetricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
}
}
};
finalizationHandler = BundleFinalizationHandlers.inMemoryFinalizer(stageBundleFactory.getInstructionRequestHandler());
checkpointHandler = getBundleCheckpointHandler(hasSdfProcessFn);
minEventTimeTimerTimestampInCurrentBundle = Long.MAX_VALUE;
minEventTimeTimerTimestampInLastBundle = Long.MAX_VALUE;
super.setPreBundleCallback(this::preBundleStartCallback);
super.setBundleFinishedCallback(this::finishBundleCallback);
// This will call {@code createWrappingDoFnRunner} which needs the above dependencies.
super.open();
}
use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.
the class FlinkExecutableStageFunctionTest method outputsAreTaggedCorrectly.
@Test
public void outputsAreTaggedCorrectly() throws Exception {
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
Map<String, Integer> outputTagMap = ImmutableMap.of("one", 1, "two", 2, "three", 3);
// We use a real StageBundleFactory here in order to exercise the output receiver factory.
StageBundleFactory stageBundleFactory = new StageBundleFactory() {
private boolean once;
@Override
public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
return new RemoteBundle() {
@Override
public String getId() {
return "bundle-id";
}
@Override
public Map<String, FnDataReceiver> getInputReceivers() {
return ImmutableMap.of("input", input -> {
/* Ignore input*/
});
}
@Override
public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
return Collections.emptyMap();
}
@Override
public void requestProgress() {
throw new UnsupportedOperationException();
}
@Override
public void split(double fractionOfRemainder) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws Exception {
if (once) {
return;
}
// Emit all values to the runner when the bundle is closed.
receiverFactory.create("one").accept(three);
receiverFactory.create("two").accept(four);
receiverFactory.create("three").accept(five);
once = true;
}
};
}
@Override
public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
return processBundleDescriptor;
}
@Override
public InstructionRequestHandler getInstructionRequestHandler() {
return null;
}
@Override
public void close() throws Exception {
}
};
// Wire the stage bundle factory into our context.
when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
FlinkExecutableStageFunction<Integer> function = getFunction(outputTagMap);
function.open(new Configuration());
if (isStateful) {
function.reduce(Collections.emptyList(), collector);
} else {
function.mapPartition(Collections.emptyList(), collector);
}
// Ensure that the tagged values sent to the collector have the correct union tags as specified
// in the output map.
verify(collector).collect(new RawUnionValue(1, three));
verify(collector).collect(new RawUnionValue(2, four));
verify(collector).collect(new RawUnionValue(3, five));
verifyNoMoreInteractions(collector);
}
use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.
the class FlinkExecutableStageFunction method open.
@Override
public void open(Configuration parameters) {
FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
// Register standard file systems.
FileSystems.setDefaultPipelineOptions(options);
executableStage = ExecutableStage.fromPayload(stagePayload);
runtimeContext = getRuntimeContext();
metricContainer = new FlinkMetricContainer(runtimeContext);
// TODO: Wire this into the distributed cache and make it pluggable.
stageContext = contextFactory.get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
// NOTE: It's safe to reuse the state handler between partitions because each partition uses the
// same backing runtime context and broadcast variables. We use checkState below to catch errors
// in backward-incompatible Flink changes.
stateRequestHandler = getStateRequestHandler(executableStage, stageBundleFactory.getProcessBundleDescriptor(), runtimeContext);
progressHandler = new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
}
@Override
public void onCompleted(ProcessBundleResponse response) {
metricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
}
};
// TODO(BEAM-11021): Support bundle finalization in portable batch.
finalizationHandler = bundleId -> {
throw new UnsupportedOperationException("Portable Flink runner doesn't support bundle finalization in batch mode. For more details, please refer to https://issues.apache.org/jira/browse/BEAM-11021.");
};
bundleCheckpointHandler = getBundleCheckpointHandler(executableStage);
}
use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.
the class SparkExecutableStageFunction method getBundleProgressHandler.
private BundleProgressHandler getBundleProgressHandler() {
String stageName = stagePayload.getInput();
MetricsContainerImpl container = metricsAccumulator.value().getContainer(stageName);
return new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
container.update(progress.getMonitoringInfosList());
}
@Override
public void onCompleted(ProcessBundleResponse response) {
container.update(response.getMonitoringInfosList());
}
};
}
Aggregations