use of org.apache.beam.sdk.transforms.DoFn.BundleFinalizer in project beam by apache.
the class DoFnInvokersTest method testBundleFinalizer.
@Test
public void testBundleFinalizer() {
class BundleFinalizerDoFn extends DoFn<String, String> {
@ProcessElement
public void processElement(BundleFinalizer bundleFinalizer) {
bundleFinalizer.afterBundleCommit(Instant.ofEpochSecond(42L), null);
}
}
BundleFinalizer mockBundleFinalizer = mock(BundleFinalizer.class);
when(mockArgumentProvider.bundleFinalizer()).thenReturn(mockBundleFinalizer);
DoFnInvoker<String, String> invoker = DoFnInvokers.invokerFor(new BundleFinalizerDoFn());
invoker.invokeProcessElement(mockArgumentProvider);
verify(mockBundleFinalizer).afterBundleCommit(eq(Instant.ofEpochSecond(42L)), eq(null));
}
use of org.apache.beam.sdk.transforms.DoFn.BundleFinalizer in project beam by apache.
the class ProcessBundleHandlerTest method testBundleFinalizationIsPropagated.
@Test
public void testBundleFinalizationIsPropagated() throws Exception {
BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", RunnerApi.PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).build()).build();
Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
FinalizeBundleHandler mockFinalizeBundleHandler = mock(FinalizeBundleHandler.class);
BundleFinalizer.Callback mockCallback = mock(BundleFinalizer.Callback.class);
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateGrpcClientCache */
mockFinalizeBundleHandler, new ShortIdMap(), ImmutableMap.of(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
BundleFinalizer bundleFinalizer = context.getBundleFinalizer();
context.addStartBundleFunction(() -> bundleFinalizer.afterBundleCommit(Instant.ofEpochMilli(42L), mockCallback));
return null;
}), Caches.noop(), new BundleProcessorCache());
BeamFnApi.InstructionResponse.Builder response = handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("2L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
assertTrue(response.getProcessBundle().getRequiresFinalization());
verify(mockFinalizeBundleHandler).registerCallbacks(eq("2L"), argThat((Collection<CallbackRegistration> arg) -> {
CallbackRegistration registration = Iterables.getOnlyElement(arg);
assertEquals(Instant.ofEpochMilli(42L), registration.getExpiryTime());
assertSame(mockCallback, registration.getCallback());
return true;
}));
}
use of org.apache.beam.sdk.transforms.DoFn.BundleFinalizer in project beam by apache.
the class ReadChangeStreamPartitionDoFnTest method setUp.
@Before
public void setUp() {
final DaoFactory daoFactory = mock(DaoFactory.class);
final MapperFactory mapperFactory = mock(MapperFactory.class);
final ChangeStreamMetrics metrics = mock(ChangeStreamMetrics.class);
final ActionFactory actionFactory = mock(ActionFactory.class);
final PartitionMetadataDao partitionMetadataDao = mock(PartitionMetadataDao.class);
final ChangeStreamDao changeStreamDao = mock(ChangeStreamDao.class);
final ChangeStreamRecordMapper changeStreamRecordMapper = mock(ChangeStreamRecordMapper.class);
final PartitionMetadataMapper partitionMetadataMapper = mock(PartitionMetadataMapper.class);
dataChangeRecordAction = mock(DataChangeRecordAction.class);
heartbeatRecordAction = mock(HeartbeatRecordAction.class);
childPartitionsRecordAction = mock(ChildPartitionsRecordAction.class);
queryChangeStreamAction = mock(QueryChangeStreamAction.class);
doFn = new ReadChangeStreamPartitionDoFn(daoFactory, mapperFactory, actionFactory, metrics);
partition = PartitionMetadata.newBuilder().setPartitionToken(PARTITION_TOKEN).setParentTokens(Sets.newHashSet("parentToken")).setStartTimestamp(PARTITION_START_TIMESTAMP).setEndTimestamp(PARTITION_END_TIMESTAMP).setHeartbeatMillis(PARTITION_HEARTBEAT_MILLIS).setState(SCHEDULED).setWatermark(PARTITION_START_TIMESTAMP).setScheduledAt(Timestamp.now()).build();
restriction = mock(OffsetRange.class);
restrictionTracker = mock(RestrictionTracker.class);
outputReceiver = mock(OutputReceiver.class);
watermarkEstimator = mock(ManualWatermarkEstimator.class);
bundleFinalizer = mock(BundleFinalizer.class);
when(restrictionTracker.currentRestriction()).thenReturn(restriction);
when(daoFactory.getPartitionMetadataDao()).thenReturn(partitionMetadataDao);
when(daoFactory.getChangeStreamDao()).thenReturn(changeStreamDao);
when(mapperFactory.changeStreamRecordMapper()).thenReturn(changeStreamRecordMapper);
when(mapperFactory.partitionMetadataMapper()).thenReturn(partitionMetadataMapper);
when(actionFactory.dataChangeRecordAction()).thenReturn(dataChangeRecordAction);
when(actionFactory.heartbeatRecordAction(metrics)).thenReturn(heartbeatRecordAction);
when(actionFactory.childPartitionsRecordAction(partitionMetadataDao, metrics)).thenReturn(childPartitionsRecordAction);
when(actionFactory.queryChangeStreamAction(changeStreamDao, partitionMetadataDao, changeStreamRecordMapper, partitionMetadataMapper, dataChangeRecordAction, heartbeatRecordAction, childPartitionsRecordAction)).thenReturn(queryChangeStreamAction);
doFn.setup();
}
use of org.apache.beam.sdk.transforms.DoFn.BundleFinalizer in project beam by apache.
the class ProcessBundleHandler method createBundleProcessor.
private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
HashSet<String> processedPTransformIds = new HashSet<>();
PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
List<ThrowingRunnable> resetFunctions = new ArrayList<>();
List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
// Build a multimap of PCollection ids to PTransform ids which consume said PCollections
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
for (String pCollectionId : entry.getValue().getInputsMap().values()) {
pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
}
}
// Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
// specified.
HandleStateCallsForBundle beamFnStateClient;
if (bundleDescriptor.hasStateApiServiceDescriptor()) {
BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
} else {
beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
}
BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
BundleFinalizer bundleFinalizer = new BundleFinalizer() {
@Override
public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
}
};
BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
// Create a BeamFnStateClient
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
// TODO: Remove source as a root and have it be triggered by the Runner.
if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
continue;
}
createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
}
bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
}, (timerEndpoint) -> {
if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
}
bundleProcessor.getTimerEndpoints().add(timerEndpoint);
}, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
}
bundleProcessor.finish();
return bundleProcessor;
}
use of org.apache.beam.sdk.transforms.DoFn.BundleFinalizer in project beam by apache.
the class DoFnOperator method open.
@Override
public void open() throws Exception {
// WindowDoFnOperator need use state and timer to get DoFn.
// So must wait StateInternals and TimerInternals ready.
// This will be called after initializeState()
this.doFn = getDoFn();
FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
StepContext stepContext = new FlinkStepContext();
doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (requiresStableInput) {
// put this in front of the root FnRunner before any additional wrappers
doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
}
doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
earlyBindStateIfNeeded();
if (!options.getDisableMetrics()) {
flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
String checkpointMetricNamespace = options.getReportCheckpointDuration();
if (checkpointMetricNamespace != null) {
MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
}
}
elementCount = 0L;
lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
// Schedule timer to check timeout of finish bundle.
long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
} else {
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
bundleFinalizer = new InMemoryBundleFinalizer();
pendingFinalizations = new LinkedHashMap<>();
}
Aggregations