use of org.apache.beam.fn.harness.data.PTransformFunctionRegistry in project beam by apache.
the class ProcessBundleHandlerTest method testBundleProcessorReset.
@Test
public void testBundleProcessorReset() throws Exception {
PTransformFunctionRegistry startFunctionRegistry = mock(PTransformFunctionRegistry.class);
PTransformFunctionRegistry finishFunctionRegistry = mock(PTransformFunctionRegistry.class);
BundleSplitListener.InMemory splitListener = mock(BundleSplitListener.InMemory.class);
Collection<CallbackRegistration> bundleFinalizationCallbacks = mock(Collection.class);
PCollectionConsumerRegistry pCollectionConsumerRegistry = mock(PCollectionConsumerRegistry.class);
MetricsContainerStepMap metricsContainerRegistry = mock(MetricsContainerStepMap.class);
ExecutionStateTracker stateTracker = mock(ExecutionStateTracker.class);
ProcessBundleHandler.HandleStateCallsForBundle beamFnStateClient = mock(ProcessBundleHandler.HandleStateCallsForBundle.class);
ThrowingRunnable resetFunction = mock(ThrowingRunnable.class);
Cache<Object, Object> processWideCache = Caches.eternal();
BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, ProcessBundleDescriptor.getDefaultInstance(), startFunctionRegistry, finishFunctionRegistry, Collections.singletonList(resetFunction), new ArrayList<>(), new ArrayList<>(), splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbacks, new HashSet<>());
bundleProcessor.finish();
CacheToken cacheToken = CacheToken.newBuilder().setSideInput(CacheToken.SideInput.newBuilder().setTransformId("transformId")).build();
bundleProcessor.setupForProcessBundleRequest(processBundleRequestFor("instructionId", "descriptorId", cacheToken));
assertEquals("instructionId", bundleProcessor.getInstructionId());
assertThat(bundleProcessor.getCacheTokens(), containsInAnyOrder(cacheToken));
Cache<Object, Object> bundleCache = bundleProcessor.getBundleCache();
bundleCache.put("A", "B");
assertEquals("B", bundleCache.peek("A"));
bundleProcessor.reset();
assertNull(bundleProcessor.getInstructionId());
assertNull(bundleProcessor.getCacheTokens());
assertNull(bundleCache.peek("A"));
verify(startFunctionRegistry, times(1)).reset();
verify(finishFunctionRegistry, times(1)).reset();
verify(splitListener, times(1)).clear();
verify(pCollectionConsumerRegistry, times(1)).reset();
verify(metricsContainerRegistry, times(1)).reset();
verify(stateTracker, times(1)).reset();
verify(bundleFinalizationCallbacks, times(1)).clear();
verify(resetFunction, times(1)).run();
// Ensure that the next setup produces the expected state.
bundleProcessor.setupForProcessBundleRequest(processBundleRequestFor("instructionId2", "descriptorId2"));
assertNotSame(bundleCache, bundleProcessor.getBundleCache());
assertEquals("instructionId2", bundleProcessor.getInstructionId());
assertThat(bundleProcessor.getCacheTokens(), is(emptyIterable()));
}
use of org.apache.beam.fn.harness.data.PTransformFunctionRegistry in project beam by apache.
the class ProcessBundleHandler method createBundleProcessor.
private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
HashSet<String> processedPTransformIds = new HashSet<>();
PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
List<ThrowingRunnable> resetFunctions = new ArrayList<>();
List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
// Build a multimap of PCollection ids to PTransform ids which consume said PCollections
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
for (String pCollectionId : entry.getValue().getInputsMap().values()) {
pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
}
}
// Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
// specified.
HandleStateCallsForBundle beamFnStateClient;
if (bundleDescriptor.hasStateApiServiceDescriptor()) {
BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
} else {
beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
}
BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
BundleFinalizer bundleFinalizer = new BundleFinalizer() {
@Override
public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
}
};
BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
// Create a BeamFnStateClient
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
// TODO: Remove source as a root and have it be triggered by the Runner.
if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
continue;
}
createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
}
bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
}, (timerEndpoint) -> {
if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
}
bundleProcessor.getTimerEndpoints().add(timerEndpoint);
}, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
}
bundleProcessor.finish();
return bundleProcessor;
}
use of org.apache.beam.fn.harness.data.PTransformFunctionRegistry in project beam by apache.
the class ProcessBundleHandler method processBundle.
/**
* Processes a bundle, running the start(), process(), and finish() functions. This function is
* required to be reentrant.
*/
public BeamFnApi.InstructionResponse.Builder processBundle(BeamFnApi.InstructionRequest request) throws Exception {
BeamFnApi.ProcessBundleResponse.Builder response = BeamFnApi.ProcessBundleResponse.newBuilder();
BundleProcessor bundleProcessor = bundleProcessorCache.get(request, () -> {
try {
return createBundleProcessor(request.getProcessBundle().getProcessBundleDescriptorId(), request.getProcessBundle());
} catch (IOException e) {
throw new RuntimeException(e);
}
});
try {
PTransformFunctionRegistry startFunctionRegistry = bundleProcessor.getStartFunctionRegistry();
PTransformFunctionRegistry finishFunctionRegistry = bundleProcessor.getFinishFunctionRegistry();
ExecutionStateTracker stateTracker = bundleProcessor.getStateTracker();
try (HandleStateCallsForBundle beamFnStateClient = bundleProcessor.getBeamFnStateClient()) {
try (Closeable closeTracker = stateTracker.activate()) {
// Already in reverse topological order so we don't need to do anything.
for (ThrowingRunnable startFunction : startFunctionRegistry.getFunctions()) {
LOG.debug("Starting function {}", startFunction);
startFunction.run();
}
if (request.getProcessBundle().hasElements()) {
boolean inputFinished = bundleProcessor.getInboundObserver().multiplexElements(request.getProcessBundle().getElements());
if (!inputFinished) {
throw new RuntimeException("Elements embedded in ProcessBundleRequest do not contain stream terminators for " + "all data and timer inputs. Unterminated endpoints: " + bundleProcessor.getInboundObserver().getUnfinishedEndpoints());
}
} else if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().isEmpty()) {
BeamFnDataInboundObserver2 observer = bundleProcessor.getInboundObserver();
beamFnDataClient.registerReceiver(request.getInstructionId(), bundleProcessor.getInboundEndpointApiServiceDescriptors(), observer);
observer.awaitCompletion();
beamFnDataClient.unregisterReceiver(request.getInstructionId(), bundleProcessor.getInboundEndpointApiServiceDescriptors());
}
// Need to reverse this since we want to call finish in topological order.
for (ThrowingRunnable finishFunction : Lists.reverse(finishFunctionRegistry.getFunctions())) {
LOG.debug("Finishing function {}", finishFunction);
finishFunction.run();
}
}
// If bundleProcessor has not flushed any elements, embed them in response.
embedOutboundElementsIfApplicable(response, bundleProcessor);
// Add all checkpointed residuals to the response.
response.addAllResidualRoots(bundleProcessor.getSplitListener().getResidualRoots());
// Add all metrics to the response.
Map<String, ByteString> monitoringData = monitoringData(bundleProcessor);
if (runnerAcceptsShortIds) {
response.putAllMonitoringData(monitoringData);
} else {
for (Map.Entry<String, ByteString> metric : monitoringData.entrySet()) {
response.addMonitoringInfos(shortIds.get(metric.getKey()).toBuilder().setPayload(metric.getValue()));
}
}
if (!bundleProcessor.getBundleFinalizationCallbackRegistrations().isEmpty()) {
finalizeBundleHandler.registerCallbacks(bundleProcessor.getInstructionId(), ImmutableList.copyOf(bundleProcessor.getBundleFinalizationCallbackRegistrations()));
response.setRequiresFinalization(true);
}
}
// Mark the bundle processor as re-usable.
bundleProcessorCache.release(request.getProcessBundle().getProcessBundleDescriptorId(), bundleProcessor);
return BeamFnApi.InstructionResponse.newBuilder().setProcessBundle(response);
} catch (Exception e) {
// Make sure we clean-up from the active set of bundle processors.
bundleProcessorCache.discard(bundleProcessor);
throw e;
}
}
Aggregations