Search in sources :

Example 71 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class FlattenRunnerTest method testFlattenWithDuplicateInputCollectionProducesMultipleOutputs.

/**
 * Create a Flatten that consumes data from the same PCollection duplicated through two outputs
 * and validates that inputs are flattened together and directed to the output.
 */
@Test
public void testFlattenWithDuplicateInputCollectionProducesMultipleOutputs() throws Exception {
    String pTransformId = "pTransformId";
    String mainOutputId = "101";
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).build();
    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).putInputs("inputA", "inputATarget").putInputs("inputAAgain", "inputATarget").putOutputs(mainOutputId, "mainOutputTarget").build();
    RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("inputATarget").setCoderId("coder-id").build();
    RunnerApi.Coder coder = CoderTranslation.toProto(StringUtf8Coder.of()).getCoder();
    PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder(pTransformId, pTransform).processBundleInstructionId("57").pCollections(Collections.singletonMap("inputATarget", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
    context.addPCollectionConsumer("mainOutputTarget", (FnDataReceiver) (FnDataReceiver<WindowedValue<String>>) mainOutputValues::add, StringUtf8Coder.of());
    new FlattenRunner.Factory<>().createRunnerForPTransform(context);
    mainOutputValues.clear();
    assertThat(context.getPCollectionConsumers().keySet(), containsInAnyOrder("inputATarget", "mainOutputTarget"));
    assertThat(context.getPCollectionConsumers().get("inputATarget"), hasSize(2));
    FnDataReceiver<WindowedValue<?>> input = context.getPCollectionConsumer("inputATarget");
    input.accept(WindowedValue.valueInGlobalWindow("A1"));
    input.accept(WindowedValue.valueInGlobalWindow("A2"));
    assertThat(mainOutputValues, containsInAnyOrder(valueInGlobalWindow("A1"), valueInGlobalWindow("A1"), valueInGlobalWindow("A2"), valueInGlobalWindow("A2")));
}
Also used : FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) ArrayList(java.util.ArrayList) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Test(org.junit.Test)

Example 72 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class ProcessBundleHandler method createBundleProcessor.

private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
    BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
    SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
    MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
    ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
    PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
    HashSet<String> processedPTransformIds = new HashSet<>();
    PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
    PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
    List<ThrowingRunnable> resetFunctions = new ArrayList<>();
    List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
    List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
    // Build a multimap of PCollection ids to PTransform ids which consume said PCollections
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        for (String pCollectionId : entry.getValue().getInputsMap().values()) {
            pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
        }
    }
    // Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
    // specified.
    HandleStateCallsForBundle beamFnStateClient;
    if (bundleDescriptor.hasStateApiServiceDescriptor()) {
        BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
        beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
    } else {
        beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
    }
    BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
    Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
    BundleFinalizer bundleFinalizer = new BundleFinalizer() {

        @Override
        public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
            bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
        }
    };
    BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
    // Create a BeamFnStateClient
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        // TODO: Remove source as a root and have it be triggered by the Runner.
        if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
            continue;
        }
        createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
            if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
                bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
            }
            bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
        }, (timerEndpoint) -> {
            if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
                throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
            }
            bundleProcessor.getTimerEndpoints().add(timerEndpoint);
        }, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
    }
    bundleProcessor.finish();
    return bundleProcessor;
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) HashSet(java.util.HashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) Instant(org.joda.time.Instant) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) LinkedHashMap(java.util.LinkedHashMap) WeakHashMap(java.util.WeakHashMap)

Example 73 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopicOverridden.

@Test
public void testTranslateSinkWithTopicOverridden() throws Exception {
    ValueProvider<TopicPath> runtimeProvider = pipeline.newProvider(TOPIC);
    PubsubUnboundedSink pubsubUnboundedSinkSink = new PubsubUnboundedSink(null, runtimeProvider, TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
    PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSinkSink);
    PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
    PDone output = input.apply(pubsubSink);
    AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
    assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
    PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
    assertEquals(((NestedValueProvider) runtimeProvider).propertyName(), payload.getTopicRuntimeOverridden());
    assertTrue(payload.getTopic().isEmpty());
    assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
    assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) TopicPath(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath) PDone(org.apache.beam.sdk.values.PDone) PubsubSink(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.PubsubSink) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) PubSubWritePayload(org.apache.beam.model.pipeline.v1.RunnerApi.PubSubWritePayload) Test(org.junit.Test)

Example 74 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopic.

@Test
public void testTranslateSinkWithTopic() throws Exception {
    PubsubUnboundedSink pubsubUnboundedSink = new PubsubUnboundedSink(null, StaticValueProvider.of(TOPIC), TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
    PubsubUnboundedSink.PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSink);
    PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
    PDone output = input.apply(pubsubSink);
    AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
    assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
    PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
    assertEquals(TOPIC.getFullPath(), payload.getTopic());
    assertTrue(payload.getTopicRuntimeOverridden().isEmpty());
    assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
    assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PDone(org.apache.beam.sdk.values.PDone) PubsubSink(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.PubsubSink) PubsubSink(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.PubsubSink) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) PubSubWritePayload(org.apache.beam.model.pipeline.v1.RunnerApi.PubSubWritePayload) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9