Search in sources :

Example 1 with PTransformFunctionRegistry

use of org.apache.beam.fn.harness.data.PTransformFunctionRegistry in project beam by apache.

the class ProcessBundleHandlerTest method testBundleProcessorReset.

@Test
public void testBundleProcessorReset() throws Exception {
    PTransformFunctionRegistry startFunctionRegistry = mock(PTransformFunctionRegistry.class);
    PTransformFunctionRegistry finishFunctionRegistry = mock(PTransformFunctionRegistry.class);
    BundleSplitListener.InMemory splitListener = mock(BundleSplitListener.InMemory.class);
    Collection<CallbackRegistration> bundleFinalizationCallbacks = mock(Collection.class);
    PCollectionConsumerRegistry pCollectionConsumerRegistry = mock(PCollectionConsumerRegistry.class);
    MetricsContainerStepMap metricsContainerRegistry = mock(MetricsContainerStepMap.class);
    ExecutionStateTracker stateTracker = mock(ExecutionStateTracker.class);
    ProcessBundleHandler.HandleStateCallsForBundle beamFnStateClient = mock(ProcessBundleHandler.HandleStateCallsForBundle.class);
    ThrowingRunnable resetFunction = mock(ThrowingRunnable.class);
    Cache<Object, Object> processWideCache = Caches.eternal();
    BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, ProcessBundleDescriptor.getDefaultInstance(), startFunctionRegistry, finishFunctionRegistry, Collections.singletonList(resetFunction), new ArrayList<>(), new ArrayList<>(), splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbacks, new HashSet<>());
    bundleProcessor.finish();
    CacheToken cacheToken = CacheToken.newBuilder().setSideInput(CacheToken.SideInput.newBuilder().setTransformId("transformId")).build();
    bundleProcessor.setupForProcessBundleRequest(processBundleRequestFor("instructionId", "descriptorId", cacheToken));
    assertEquals("instructionId", bundleProcessor.getInstructionId());
    assertThat(bundleProcessor.getCacheTokens(), containsInAnyOrder(cacheToken));
    Cache<Object, Object> bundleCache = bundleProcessor.getBundleCache();
    bundleCache.put("A", "B");
    assertEquals("B", bundleCache.peek("A"));
    bundleProcessor.reset();
    assertNull(bundleProcessor.getInstructionId());
    assertNull(bundleProcessor.getCacheTokens());
    assertNull(bundleCache.peek("A"));
    verify(startFunctionRegistry, times(1)).reset();
    verify(finishFunctionRegistry, times(1)).reset();
    verify(splitListener, times(1)).clear();
    verify(pCollectionConsumerRegistry, times(1)).reset();
    verify(metricsContainerRegistry, times(1)).reset();
    verify(stateTracker, times(1)).reset();
    verify(bundleFinalizationCallbacks, times(1)).clear();
    verify(resetFunction, times(1)).run();
    // Ensure that the next setup produces the expected state.
    bundleProcessor.setupForProcessBundleRequest(processBundleRequestFor("instructionId2", "descriptorId2"));
    assertNotSame(bundleCache, bundleProcessor.getBundleCache());
    assertEquals("instructionId2", bundleProcessor.getInstructionId());
    assertThat(bundleProcessor.getCacheTokens(), is(emptyIterable()));
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) CacheToken(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest.CacheToken) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) BundleProcessor(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor) Test(org.junit.Test)

Example 2 with PTransformFunctionRegistry

use of org.apache.beam.fn.harness.data.PTransformFunctionRegistry in project beam by apache.

the class ProcessBundleHandler method createBundleProcessor.

private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
    BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
    SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
    MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
    ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
    PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
    HashSet<String> processedPTransformIds = new HashSet<>();
    PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
    PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
    List<ThrowingRunnable> resetFunctions = new ArrayList<>();
    List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
    List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
    // Build a multimap of PCollection ids to PTransform ids which consume said PCollections
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        for (String pCollectionId : entry.getValue().getInputsMap().values()) {
            pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
        }
    }
    // Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
    // specified.
    HandleStateCallsForBundle beamFnStateClient;
    if (bundleDescriptor.hasStateApiServiceDescriptor()) {
        BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
        beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
    } else {
        beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
    }
    BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
    Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
    BundleFinalizer bundleFinalizer = new BundleFinalizer() {

        @Override
        public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
            bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
        }
    };
    BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
    // Create a BeamFnStateClient
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        // TODO: Remove source as a root and have it be triggered by the Runner.
        if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
            continue;
        }
        createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
            if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
                bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
            }
            bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
        }, (timerEndpoint) -> {
            if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
                throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
            }
            bundleProcessor.getTimerEndpoints().add(timerEndpoint);
        }, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
    }
    bundleProcessor.finish();
    return bundleProcessor;
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) HashSet(java.util.HashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) Instant(org.joda.time.Instant) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) LinkedHashMap(java.util.LinkedHashMap) WeakHashMap(java.util.WeakHashMap)

Example 3 with PTransformFunctionRegistry

use of org.apache.beam.fn.harness.data.PTransformFunctionRegistry in project beam by apache.

the class ProcessBundleHandler method processBundle.

/**
 * Processes a bundle, running the start(), process(), and finish() functions. This function is
 * required to be reentrant.
 */
public BeamFnApi.InstructionResponse.Builder processBundle(BeamFnApi.InstructionRequest request) throws Exception {
    BeamFnApi.ProcessBundleResponse.Builder response = BeamFnApi.ProcessBundleResponse.newBuilder();
    BundleProcessor bundleProcessor = bundleProcessorCache.get(request, () -> {
        try {
            return createBundleProcessor(request.getProcessBundle().getProcessBundleDescriptorId(), request.getProcessBundle());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
    try {
        PTransformFunctionRegistry startFunctionRegistry = bundleProcessor.getStartFunctionRegistry();
        PTransformFunctionRegistry finishFunctionRegistry = bundleProcessor.getFinishFunctionRegistry();
        ExecutionStateTracker stateTracker = bundleProcessor.getStateTracker();
        try (HandleStateCallsForBundle beamFnStateClient = bundleProcessor.getBeamFnStateClient()) {
            try (Closeable closeTracker = stateTracker.activate()) {
                // Already in reverse topological order so we don't need to do anything.
                for (ThrowingRunnable startFunction : startFunctionRegistry.getFunctions()) {
                    LOG.debug("Starting function {}", startFunction);
                    startFunction.run();
                }
                if (request.getProcessBundle().hasElements()) {
                    boolean inputFinished = bundleProcessor.getInboundObserver().multiplexElements(request.getProcessBundle().getElements());
                    if (!inputFinished) {
                        throw new RuntimeException("Elements embedded in ProcessBundleRequest do not contain stream terminators for " + "all data and timer inputs. Unterminated endpoints: " + bundleProcessor.getInboundObserver().getUnfinishedEndpoints());
                    }
                } else if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().isEmpty()) {
                    BeamFnDataInboundObserver2 observer = bundleProcessor.getInboundObserver();
                    beamFnDataClient.registerReceiver(request.getInstructionId(), bundleProcessor.getInboundEndpointApiServiceDescriptors(), observer);
                    observer.awaitCompletion();
                    beamFnDataClient.unregisterReceiver(request.getInstructionId(), bundleProcessor.getInboundEndpointApiServiceDescriptors());
                }
                // Need to reverse this since we want to call finish in topological order.
                for (ThrowingRunnable finishFunction : Lists.reverse(finishFunctionRegistry.getFunctions())) {
                    LOG.debug("Finishing function {}", finishFunction);
                    finishFunction.run();
                }
            }
            // If bundleProcessor has not flushed any elements, embed them in response.
            embedOutboundElementsIfApplicable(response, bundleProcessor);
            // Add all checkpointed residuals to the response.
            response.addAllResidualRoots(bundleProcessor.getSplitListener().getResidualRoots());
            // Add all metrics to the response.
            Map<String, ByteString> monitoringData = monitoringData(bundleProcessor);
            if (runnerAcceptsShortIds) {
                response.putAllMonitoringData(monitoringData);
            } else {
                for (Map.Entry<String, ByteString> metric : monitoringData.entrySet()) {
                    response.addMonitoringInfos(shortIds.get(metric.getKey()).toBuilder().setPayload(metric.getValue()));
                }
            }
            if (!bundleProcessor.getBundleFinalizationCallbackRegistrations().isEmpty()) {
                finalizeBundleHandler.registerCallbacks(bundleProcessor.getInstructionId(), ImmutableList.copyOf(bundleProcessor.getBundleFinalizationCallbackRegistrations()));
                response.setRequiresFinalization(true);
            }
        }
        // Mark the bundle processor as re-usable.
        bundleProcessorCache.release(request.getProcessBundle().getProcessBundleDescriptorId(), bundleProcessor);
        return BeamFnApi.InstructionResponse.newBuilder().setProcessBundle(response);
    } catch (Exception e) {
        // Make sure we clean-up from the active set of bundle processors.
        bundleProcessorCache.discard(bundleProcessor);
        throw e;
    }
}
Also used : PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Closeable(java.io.Closeable) IOException(java.io.IOException) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) BeamFnDataInboundObserver2(org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2) IOException(java.io.IOException) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) LinkedHashMap(java.util.LinkedHashMap) WeakHashMap(java.util.WeakHashMap) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse)

Aggregations

PTransformFunctionRegistry (org.apache.beam.fn.harness.data.PTransformFunctionRegistry)3 ExecutionStateTracker (org.apache.beam.runners.core.metrics.ExecutionStateTracker)3 MetricsContainerStepMap (org.apache.beam.runners.core.metrics.MetricsContainerStepMap)3 ThrowingRunnable (org.apache.beam.sdk.function.ThrowingRunnable)3 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 WeakHashMap (java.util.WeakHashMap)2 CallbackRegistration (org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration)2 PCollectionConsumerRegistry (org.apache.beam.fn.harness.data.PCollectionConsumerRegistry)2 ShortIdMap (org.apache.beam.runners.core.metrics.ShortIdMap)2 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)2 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)2 Closeable (java.io.Closeable)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 ProgressRequestCallback (org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback)1 BundleProcessor (org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor)1 BeamFnStateClient (org.apache.beam.fn.harness.state.BeamFnStateClient)1 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)1