use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse in project beam by apache.
the class ExecutableStageDoFnOperator method open.
@Override
public void open() throws Exception {
executableStage = ExecutableStage.fromPayload(payload);
hasSdfProcessFn = hasSDF(executableStage);
initializeUserState(executableStage, getKeyedStateBackend(), pipelineOptions);
// TODO: Wire this into the distributed cache and make it pluggable.
// TODO: Do we really want this layer of indirection when accessing the stage bundle factory?
// It's a little strange because this operator is responsible for the lifetime of the stage
// bundle "factory" (manager?) but not the job or Flink bundle factories. How do we make
// ownership of the higher level "factories" explicit? Do we care?
stageContext = contextFactory.get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
stateRequestHandler = getStateRequestHandler(executableStage);
progressHandler = new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
if (flinkMetricContainer != null) {
flinkMetricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
}
}
@Override
public void onCompleted(ProcessBundleResponse response) {
if (flinkMetricContainer != null) {
flinkMetricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
}
}
};
finalizationHandler = BundleFinalizationHandlers.inMemoryFinalizer(stageBundleFactory.getInstructionRequestHandler());
checkpointHandler = getBundleCheckpointHandler(hasSdfProcessFn);
minEventTimeTimerTimestampInCurrentBundle = Long.MAX_VALUE;
minEventTimeTimerTimestampInLastBundle = Long.MAX_VALUE;
super.setPreBundleCallback(this::preBundleStartCallback);
super.setBundleFinishedCallback(this::finishBundleCallback);
// This will call {@code createWrappingDoFnRunner} which needs the above dependencies.
super.open();
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse in project beam by apache.
the class RegisterAndProcessBundleOperationTest method testGetProcessBundleProgressFetchesProgressResponseWhenBundleIdCached.
@Test
public void testGetProcessBundleProgressFetchesProgressResponseWhenBundleIdCached() throws Exception {
InstructionRequestHandler mockInstructionRequestHandler = mock(InstructionRequestHandler.class);
RegisterAndProcessBundleOperation operation = new RegisterAndProcessBundleOperation(IdGenerators.decrementingLongs(), mockInstructionRequestHandler, mockBeamFnStateDelegator, REGISTER_REQUEST, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableTable.of(), ImmutableMap.of(), mockContext);
// this generates and caches bundleId
operation.getProcessBundleInstructionId();
ProcessBundleProgressResponse expectedResult = ProcessBundleProgressResponse.newBuilder().build();
InstructionResponse instructionResponse = InstructionResponse.newBuilder().setProcessBundleProgress(expectedResult).build();
CompletableFuture resultFuture = CompletableFuture.completedFuture(instructionResponse);
when(mockInstructionRequestHandler.handle(any())).thenReturn(resultFuture);
final ProcessBundleProgressResponse result = MoreFutures.get(operation.getProcessBundleProgress());
assertSame("Return value from mockInstructionRequestHandler", expectedResult, result);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse in project beam by apache.
the class FlinkExecutableStageFunction method open.
@Override
public void open(Configuration parameters) {
FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
// Register standard file systems.
FileSystems.setDefaultPipelineOptions(options);
executableStage = ExecutableStage.fromPayload(stagePayload);
runtimeContext = getRuntimeContext();
metricContainer = new FlinkMetricContainer(runtimeContext);
// TODO: Wire this into the distributed cache and make it pluggable.
stageContext = contextFactory.get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
// NOTE: It's safe to reuse the state handler between partitions because each partition uses the
// same backing runtime context and broadcast variables. We use checkState below to catch errors
// in backward-incompatible Flink changes.
stateRequestHandler = getStateRequestHandler(executableStage, stageBundleFactory.getProcessBundleDescriptor(), runtimeContext);
progressHandler = new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
}
@Override
public void onCompleted(ProcessBundleResponse response) {
metricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
}
};
// TODO(BEAM-11021): Support bundle finalization in portable batch.
finalizationHandler = bundleId -> {
throw new UnsupportedOperationException("Portable Flink runner doesn't support bundle finalization in batch mode. For more details, please refer to https://issues.apache.org/jira/browse/BEAM-11021.");
};
bundleCheckpointHandler = getBundleCheckpointHandler(executableStage);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse in project beam by apache.
the class RemoteExecutionTest method testMetrics.
@Test
@SuppressWarnings("FutureReturnValueIgnored")
public void testMetrics() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
MetricsDoFn metricsDoFn = new MetricsDoFn();
Pipeline p = Pipeline.create();
PCollection<String> input = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(metricsDoFn)).setCoder(StringUtf8Coder.of());
SingleOutput<String, String> pardo = ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void process(ProcessContext ctxt) {
// Output the element twice to keep unique numbers in asserts, 6 output elements.
ctxt.output(ctxt.element());
ctxt.output(ctxt.element());
}
});
input.apply("processA", pardo).setCoder(StringUtf8Coder.of());
input.apply("processB", pardo).setCoder(StringUtf8Coder.of());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> true);
checkState(optionalStage.isPresent(), "Expected a stage with side inputs.");
ExecutableStage stage = optionalStage.get();
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
List<WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputContents::add));
}
final String testPTransformId = "create-ParMultiDo-Metrics-";
BundleProgressHandler progressHandler = new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse response) {
MetricsDoFn.ALLOW_COMPLETION.get(metricsDoFn.uuid).countDown();
List<Matcher<MonitoringInfo>> matchers = new ArrayList<>();
// We expect all user counters except for the ones in @FinishBundle
// Since non-user metrics are registered at bundle creation time, they will still report
// values most of which will be 0.
SimpleMonitoringInfoBuilder builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_COUNTER_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64SumValue(1);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_COUNTER_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64SumValue(10);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_COUNTER_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
matchers.add(not(MonitoringInfoMatchers.matchSetFields(builder.build())));
// User Distributions.
builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_DISTRIBUTION_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64DistributionValue(DistributionData.create(1, 1, 1, 1));
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_DISTRIBUTION_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64DistributionValue(DistributionData.create(10, 1, 10, 10));
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_DISTRIBUTION_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
matchers.add(not(MonitoringInfoMatchers.matchSetFields(builder.build())));
assertThat(response.getMonitoringInfosList(), Matchers.hasItems(matchers.toArray(new Matcher[0])));
}
@Override
public void onCompleted(ProcessBundleResponse response) {
List<Matcher<MonitoringInfo>> matchers = new ArrayList<>();
// User Counters.
SimpleMonitoringInfoBuilder builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_COUNTER_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64SumValue(1);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_COUNTER_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64SumValue(10);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_COUNTER_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64SumValue(100);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
// User Distributions.
builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.PROCESS_USER_DISTRIBUTION_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64DistributionValue(DistributionData.create(1, 1, 1, 1));
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.START_USER_DISTRIBUTION_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64DistributionValue(DistributionData.create(10, 1, 10, 10));
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.USER_DISTRIBUTION_INT64).setLabel(MonitoringInfoConstants.Labels.NAMESPACE, RemoteExecutionTest.class.getName()).setLabel(MonitoringInfoConstants.Labels.NAME, MetricsDoFn.FINISH_USER_DISTRIBUTION_NAME);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
builder.setInt64DistributionValue(DistributionData.create(100, 1, 100, 100));
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
// The element counter should be counted only once for the pcollection.
// So there should be only two elements.
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "impulse.out");
builder.setInt64SumValue(1);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "create/ParMultiDo(Metrics).output");
builder.setInt64SumValue(3);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
// Verify that the element count is not double counted if two PCollections consume it.
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "processA/ParMultiDo(Anonymous).output");
builder.setInt64SumValue(6);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
builder.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "processB/ParMultiDo(Anonymous).output");
builder.setInt64SumValue(6);
matchers.add(MonitoringInfoMatchers.matchSetFields(builder.build()));
// Check for execution time metrics for the testPTransformId
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(MonitoringInfoConstants.Urns.START_BUNDLE_MSECS);
builder.setType(TypeUrns.SUM_INT64_TYPE);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
matchers.add(allOf(MonitoringInfoMatchers.matchSetFields(builder.build()), MonitoringInfoMatchers.counterValueGreaterThanOrEqualTo(1)));
// Check for execution time metrics for the testPTransformId
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(Urns.PROCESS_BUNDLE_MSECS);
builder.setType(TypeUrns.SUM_INT64_TYPE);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
matchers.add(allOf(MonitoringInfoMatchers.matchSetFields(builder.build()), MonitoringInfoMatchers.counterValueGreaterThanOrEqualTo(2)));
builder = new SimpleMonitoringInfoBuilder();
builder.setUrn(Urns.FINISH_BUNDLE_MSECS);
builder.setType(TypeUrns.SUM_INT64_TYPE);
builder.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, testPTransformId);
matchers.add(allOf(MonitoringInfoMatchers.matchSetFields(builder.build()), MonitoringInfoMatchers.counterValueGreaterThanOrEqualTo(3)));
assertThat(response.getMonitoringInfosList(), Matchers.hasItems(matchers.toArray(new Matcher[0])));
}
};
ExecutorService executor = Executors.newSingleThreadExecutor();
try (RemoteBundle bundle = processor.newBundle(outputReceivers, StateRequestHandler.unsupported(), progressHandler)) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "X")));
executor.submit(() -> {
checkState(MetricsDoFn.AFTER_PROCESS.get(metricsDoFn.uuid).await(60, TimeUnit.SECONDS), "Runner waited too long for DoFn to get to AFTER_PROCESS.");
bundle.requestProgress();
return (Void) null;
});
}
executor.shutdown();
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse in project beam by apache.
the class SparkExecutableStageFunction method getBundleProgressHandler.
private BundleProgressHandler getBundleProgressHandler() {
String stageName = stagePayload.getInput();
MetricsContainerImpl container = metricsAccumulator.value().getContainer(stageName);
return new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
container.update(progress.getMonitoringInfosList());
}
@Override
public void onCompleted(ProcessBundleResponse response) {
container.update(response.getMonitoringInfosList());
}
};
}
Aggregations