Search in sources :

Example 31 with Coder

use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.

the class AssignWindowsRunnerTest method multipleInputWindowsAsMapFnSucceeds.

@Test
public void multipleInputWindowsAsMapFnSucceeds() throws Exception {
    WindowFn<Object, BoundedWindow> windowFn = new WindowFn<Object, BoundedWindow>() {

        @Override
        public Collection<BoundedWindow> assignWindows(AssignContext c) {
            c.window();
            return ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3)));
        }

        @Override
        public void mergeWindows(MergeContext c) {
            throw new UnsupportedOperationException();
        }

        @Override
        public WindowMappingFn<BoundedWindow> getDefaultWindowMappingFn() {
            throw new UnsupportedOperationException();
        }

        @Override
        public boolean isCompatible(WindowFn<?, ?> other) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Coder<BoundedWindow> windowCoder() {
            throw new UnsupportedOperationException();
        }
    };
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("input").setCoderId("coder-id").build();
    RunnerApi.Coder coder = CoderTranslation.toProto(VarIntCoder.of()).getCoder();
    PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder("ptransform", PTransform.newBuilder().putInputs("in", "input").putOutputs("out", "output").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(WindowingStrategyTranslation.toProto(windowFn, components)).build().toByteString())).build()).pCollections(Collections.singletonMap("input", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
    Collection<WindowedValue<?>> outputs = new ArrayList<>();
    context.addPCollectionConsumer("output", outputs::add, VarIntCoder.of());
    MapFnRunners.forWindowedValueMapFnFactory(new AssignWindowsMapFnFactory<>()).createRunnerForPTransform(context);
    WindowedValue<Integer> value = WindowedValue.of(2, new Instant(-10L), ImmutableList.of(new IntervalWindow(new Instant(-22L), Duration.standardMinutes(5L)), new IntervalWindow(new Instant(-120000L), Duration.standardMinutes(3L))), PaneInfo.ON_TIME_AND_ONLY_FIRING);
    context.getPCollectionConsumer("input").accept(value);
    assertThat(outputs, containsInAnyOrder(WindowedValue.of(2, new Instant(-10L), ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3))), PaneInfo.ON_TIME_AND_ONLY_FIRING), WindowedValue.of(2, new Instant(-10L), ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3))), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
}
Also used : WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) PartitioningWindowFn(org.apache.beam.sdk.transforms.windowing.PartitioningWindowFn) Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) AssignWindowsMapFnFactory(org.apache.beam.fn.harness.AssignWindowsRunner.AssignWindowsMapFnFactory) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 32 with Coder

use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.

the class FlattenRunnerTest method testFlattenWithDuplicateInputCollectionProducesMultipleOutputs.

/**
 * Create a Flatten that consumes data from the same PCollection duplicated through two outputs
 * and validates that inputs are flattened together and directed to the output.
 */
@Test
public void testFlattenWithDuplicateInputCollectionProducesMultipleOutputs() throws Exception {
    String pTransformId = "pTransformId";
    String mainOutputId = "101";
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).build();
    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).putInputs("inputA", "inputATarget").putInputs("inputAAgain", "inputATarget").putOutputs(mainOutputId, "mainOutputTarget").build();
    RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("inputATarget").setCoderId("coder-id").build();
    RunnerApi.Coder coder = CoderTranslation.toProto(StringUtf8Coder.of()).getCoder();
    PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder(pTransformId, pTransform).processBundleInstructionId("57").pCollections(Collections.singletonMap("inputATarget", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
    context.addPCollectionConsumer("mainOutputTarget", (FnDataReceiver) (FnDataReceiver<WindowedValue<String>>) mainOutputValues::add, StringUtf8Coder.of());
    new FlattenRunner.Factory<>().createRunnerForPTransform(context);
    mainOutputValues.clear();
    assertThat(context.getPCollectionConsumers().keySet(), containsInAnyOrder("inputATarget", "mainOutputTarget"));
    assertThat(context.getPCollectionConsumers().get("inputATarget"), hasSize(2));
    FnDataReceiver<WindowedValue<?>> input = context.getPCollectionConsumer("inputATarget");
    input.accept(WindowedValue.valueInGlobalWindow("A1"));
    input.accept(WindowedValue.valueInGlobalWindow("A2"));
    assertThat(mainOutputValues, containsInAnyOrder(valueInGlobalWindow("A1"), valueInGlobalWindow("A1"), valueInGlobalWindow("A2"), valueInGlobalWindow("A2")));
}
Also used : FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) ArrayList(java.util.ArrayList) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Test(org.junit.Test)

Example 33 with Coder

use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.

the class PTransformRunnerFactoryTestContext method addOutgoingDataEndpoint.

@Override
public <T> FnDataReceiver<T> addOutgoingDataEndpoint(ApiServiceDescriptor apiServiceDescriptor, Coder<T> coder) {
    BeamFnDataOutboundAggregator aggregator = getOutboundAggregators().get(apiServiceDescriptor);
    FnDataReceiver<T> receiver = aggregator.registerOutputDataLocation(getPTransformId(), coder);
    getOutgoingDataEndpoints().computeIfAbsent(apiServiceDescriptor, (unused) -> new ArrayList<>()).add(DataEndpoint.create(getPTransformId(), coder, receiver));
    return receiver;
}
Also used : BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Map(java.util.Map) DelayedBundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication) BeamFnDataClient(org.apache.beam.fn.harness.data.BeamFnDataClient) BundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.BundleApplication) NoSuchElementException(java.util.NoSuchElementException) MetricsApi(org.apache.beam.model.pipeline.v1.MetricsApi) ApiServiceDescriptor(org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) CloseableFnDataReceiver(org.apache.beam.sdk.fn.data.CloseableFnDataReceiver) TimerEndpoint(org.apache.beam.sdk.fn.data.TimerEndpoint) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Set(java.util.Set) BundleSplitListener(org.apache.beam.fn.harness.control.BundleSplitListener) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) DataEndpoint(org.apache.beam.sdk.fn.data.DataEndpoint) List(java.util.List) StateRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateRequest) Timer(org.apache.beam.runners.core.construction.Timer) Instant(org.joda.time.Instant) AutoValue(com.google.auto.value.AutoValue) Collections(java.util.Collections) StateResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateResponse) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ArrayList(java.util.ArrayList)

Example 34 with Coder

use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.

the class BeamFnDataGrpcClientTest method testForInboundConsumerThatThrows.

@Test
public void testForInboundConsumerThatThrows() throws Exception {
    CountDownLatch waitForClientToConnect = new CountDownLatch(1);
    AtomicInteger consumerInvoked = new AtomicInteger();
    Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
    AtomicReference<StreamObserver<BeamFnApi.Elements>> outboundServerObserver = new AtomicReference<>();
    CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext(inboundServerValues::add).build();
    Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
    Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {

        @Override
        public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
            outboundServerObserver.set(outboundObserver);
            waitForClientToConnect.countDown();
            return inboundServerObserver;
        }
    }).build();
    server.start();
    RuntimeException exceptionToThrow = new RuntimeException("TestFailure");
    try {
        ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
        BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
        BeamFnDataInboundObserver2 observer = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_A, CODER, t -> {
            consumerInvoked.incrementAndGet();
            throw exceptionToThrow;
        })), Collections.emptyList());
        clientFactory.registerReceiver(INSTRUCTION_ID_A, Arrays.asList(apiServiceDescriptor), observer);
        waitForClientToConnect.await();
        // This first message should cause a failure afterwards all other messages are dropped.
        outboundServerObserver.get().onNext(ELEMENTS_A_1);
        outboundServerObserver.get().onNext(ELEMENTS_A_2);
        try {
            observer.awaitCompletion();
            fail("Expected channel to fail");
        } catch (Exception e) {
            assertEquals(exceptionToThrow, e);
        }
        // The server should not have received any values
        assertThat(inboundServerValues, empty());
        // The consumer should have only been invoked once
        assertEquals(1, consumerInvoked.get());
    } finally {
        server.shutdownNow();
    }
}
Also used : CallStreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) BeamFnDataInboundObserver2(org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 35 with Coder

use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.

the class BeamFnDataGrpcClientTest method testForOutboundConsumer.

@Test
public void testForOutboundConsumer() throws Exception {
    CountDownLatch waitForInboundServerValuesCompletion = new CountDownLatch(2);
    Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
    CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext((BeamFnApi.Elements t) -> {
        inboundServerValues.add(t);
        waitForInboundServerValuesCompletion.countDown();
    }).build();
    Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
    Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {

        @Override
        public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
            return inboundServerObserver;
        }
    }).build();
    server.start();
    try {
        ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
        BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.fromArgs(new String[] { "--experiments=data_buffer_size_limit=20" }).create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
        BeamFnDataOutboundAggregator aggregator = clientFactory.createOutboundAggregator(apiServiceDescriptor, () -> INSTRUCTION_ID_A, false);
        FnDataReceiver<WindowedValue<String>> fnDataReceiver = aggregator.registerOutputDataLocation(TRANSFORM_ID_A, CODER);
        fnDataReceiver.accept(valueInGlobalWindow("ABC"));
        fnDataReceiver.accept(valueInGlobalWindow("DEF"));
        fnDataReceiver.accept(valueInGlobalWindow("GHI"));
        aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
        waitForInboundServerValuesCompletion.await();
        assertThat(inboundServerValues, contains(ELEMENTS_A_1, ELEMENTS_A_2));
    } finally {
        server.shutdownNow();
    }
}
Also used : CallStreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) CountDownLatch(java.util.concurrent.CountDownLatch) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)48 Coder (org.apache.beam.sdk.coders.Coder)33 WindowedValue (org.apache.beam.sdk.util.WindowedValue)32 KvCoder (org.apache.beam.sdk.coders.KvCoder)30 Test (org.junit.Test)30 Map (java.util.Map)23 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 HashMap (java.util.HashMap)21 KV (org.apache.beam.sdk.values.KV)20 ArrayList (java.util.ArrayList)19 IOException (java.io.IOException)18 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)17 List (java.util.List)16 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)16 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)15 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)15 Collection (java.util.Collection)13 Pipeline (org.apache.beam.sdk.Pipeline)13 FusedPipeline (org.apache.beam.runners.core.construction.graph.FusedPipeline)12 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)11