Search in sources :

Example 1 with PCollectionConsumerRegistry

use of org.apache.beam.fn.harness.data.PCollectionConsumerRegistry in project beam by apache.

the class ProcessBundleHandlerTest method testBundleProcessorReset.

@Test
public void testBundleProcessorReset() throws Exception {
    PTransformFunctionRegistry startFunctionRegistry = mock(PTransformFunctionRegistry.class);
    PTransformFunctionRegistry finishFunctionRegistry = mock(PTransformFunctionRegistry.class);
    BundleSplitListener.InMemory splitListener = mock(BundleSplitListener.InMemory.class);
    Collection<CallbackRegistration> bundleFinalizationCallbacks = mock(Collection.class);
    PCollectionConsumerRegistry pCollectionConsumerRegistry = mock(PCollectionConsumerRegistry.class);
    MetricsContainerStepMap metricsContainerRegistry = mock(MetricsContainerStepMap.class);
    ExecutionStateTracker stateTracker = mock(ExecutionStateTracker.class);
    ProcessBundleHandler.HandleStateCallsForBundle beamFnStateClient = mock(ProcessBundleHandler.HandleStateCallsForBundle.class);
    ThrowingRunnable resetFunction = mock(ThrowingRunnable.class);
    Cache<Object, Object> processWideCache = Caches.eternal();
    BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, ProcessBundleDescriptor.getDefaultInstance(), startFunctionRegistry, finishFunctionRegistry, Collections.singletonList(resetFunction), new ArrayList<>(), new ArrayList<>(), splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbacks, new HashSet<>());
    bundleProcessor.finish();
    CacheToken cacheToken = CacheToken.newBuilder().setSideInput(CacheToken.SideInput.newBuilder().setTransformId("transformId")).build();
    bundleProcessor.setupForProcessBundleRequest(processBundleRequestFor("instructionId", "descriptorId", cacheToken));
    assertEquals("instructionId", bundleProcessor.getInstructionId());
    assertThat(bundleProcessor.getCacheTokens(), containsInAnyOrder(cacheToken));
    Cache<Object, Object> bundleCache = bundleProcessor.getBundleCache();
    bundleCache.put("A", "B");
    assertEquals("B", bundleCache.peek("A"));
    bundleProcessor.reset();
    assertNull(bundleProcessor.getInstructionId());
    assertNull(bundleProcessor.getCacheTokens());
    assertNull(bundleCache.peek("A"));
    verify(startFunctionRegistry, times(1)).reset();
    verify(finishFunctionRegistry, times(1)).reset();
    verify(splitListener, times(1)).clear();
    verify(pCollectionConsumerRegistry, times(1)).reset();
    verify(metricsContainerRegistry, times(1)).reset();
    verify(stateTracker, times(1)).reset();
    verify(bundleFinalizationCallbacks, times(1)).clear();
    verify(resetFunction, times(1)).run();
    // Ensure that the next setup produces the expected state.
    bundleProcessor.setupForProcessBundleRequest(processBundleRequestFor("instructionId2", "descriptorId2"));
    assertNotSame(bundleCache, bundleProcessor.getBundleCache());
    assertEquals("instructionId2", bundleProcessor.getInstructionId());
    assertThat(bundleProcessor.getCacheTokens(), is(emptyIterable()));
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) CacheToken(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest.CacheToken) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) BundleProcessor(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor) Test(org.junit.Test)

Example 2 with PCollectionConsumerRegistry

use of org.apache.beam.fn.harness.data.PCollectionConsumerRegistry in project beam by apache.

the class ProcessBundleHandler method createBundleProcessor.

private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
    BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
    SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
    MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
    ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
    PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
    HashSet<String> processedPTransformIds = new HashSet<>();
    PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
    PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
    List<ThrowingRunnable> resetFunctions = new ArrayList<>();
    List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
    List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
    // Build a multimap of PCollection ids to PTransform ids which consume said PCollections
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        for (String pCollectionId : entry.getValue().getInputsMap().values()) {
            pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
        }
    }
    // Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
    // specified.
    HandleStateCallsForBundle beamFnStateClient;
    if (bundleDescriptor.hasStateApiServiceDescriptor()) {
        BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
        beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
    } else {
        beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
    }
    BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
    Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
    BundleFinalizer bundleFinalizer = new BundleFinalizer() {

        @Override
        public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
            bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
        }
    };
    BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
    // Create a BeamFnStateClient
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        // TODO: Remove source as a root and have it be triggered by the Runner.
        if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
            continue;
        }
        createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
            if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
                bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
            }
            bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
        }, (timerEndpoint) -> {
            if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
                throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
            }
            bundleProcessor.getTimerEndpoints().add(timerEndpoint);
        }, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
    }
    bundleProcessor.finish();
    return bundleProcessor;
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) HashSet(java.util.HashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) Instant(org.joda.time.Instant) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) LinkedHashMap(java.util.LinkedHashMap) WeakHashMap(java.util.WeakHashMap)

Aggregations

CallbackRegistration (org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration)2 PCollectionConsumerRegistry (org.apache.beam.fn.harness.data.PCollectionConsumerRegistry)2 PTransformFunctionRegistry (org.apache.beam.fn.harness.data.PTransformFunctionRegistry)2 ExecutionStateTracker (org.apache.beam.runners.core.metrics.ExecutionStateTracker)2 MetricsContainerStepMap (org.apache.beam.runners.core.metrics.MetricsContainerStepMap)2 ThrowingRunnable (org.apache.beam.sdk.function.ThrowingRunnable)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 WeakHashMap (java.util.WeakHashMap)1 ProgressRequestCallback (org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback)1 BundleProcessor (org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor)1 BeamFnStateClient (org.apache.beam.fn.harness.state.BeamFnStateClient)1 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)1 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)1 CacheToken (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest.CacheToken)1 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)1 ShortIdMap (org.apache.beam.runners.core.metrics.ShortIdMap)1 BundleFinalizer (org.apache.beam.sdk.transforms.DoFn.BundleFinalizer)1