Search in sources :

Example 71 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class DoFnOperatorTest method testSideInputs.

public void testSideInputs(boolean keyed) throws Exception {
    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
    TupleTag<String> outputTag = new TupleTag<>("main-output");
    ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
    Coder<String> keyCoder = null;
    if (keyed) {
        keyCoder = StringUtf8Coder.of();
    }
    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<String>(), "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<String>(), WindowingStrategy.globalDefault(), sideInputMapping, /* side-input mapping */
    ImmutableList.<PCollectionView<?>>of(view1, view2), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), keyCoder);
    TwoInputStreamOperatorTestHarness<WindowedValue<String>, RawUnionValue, String> testHarness = new TwoInputStreamOperatorTestHarness<>(doFnOperator);
    if (keyed) {
        // we use a dummy key for the second input since it is considered to be broadcast
        testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, new StringKeySelector(), new DummyKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
    }
    testHarness.open();
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(100));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(500));
    // test the keep of sideInputs events
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(1, valuesInWindow(ImmutableList.of("hello", "ciao"), new Instant(0), firstWindow))));
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(2, valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(0), secondWindow))));
    // push in a regular elements
    WindowedValue<String> helloElement = valueInWindow("Hello", new Instant(0), firstWindow);
    WindowedValue<String> worldElement = valueInWindow("World", new Instant(1000), firstWindow);
    testHarness.processElement1(new StreamRecord<>(helloElement));
    testHarness.processElement1(new StreamRecord<>(worldElement));
    // test the keep of pushed-back events
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(1, valuesInWindow(ImmutableList.of("hello", "ciao"), new Instant(1000), firstWindow))));
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(2, valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(1000), secondWindow))));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(helloElement, worldElement));
    testHarness.close();
}
Also used : TwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 72 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class ProcessBundleHandlerTest method testCreatingAndProcessingDoFn.

/**
   * Create a DoFn that has 3 inputs (inputATarget1, inputATarget2, inputBTarget) and 2 outputs
   * (mainOutput, output). Validate that inputs are fed to the {@link DoFn} and that outputs
   * are directed to the correct consumers.
   */
@Test
public void testCreatingAndProcessingDoFn() throws Exception {
    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
    String primitiveTransformId = "100L";
    long mainOutputId = 101L;
    long additionalOutputId = 102L;
    DoFnInfo<?, ?> doFnInfo = DoFnInfo.forFn(new TestDoFn(), WindowingStrategy.globalDefault(), ImmutableList.of(), StringUtf8Coder.of(), mainOutputId, ImmutableMap.of(mainOutputId, TestDoFn.mainOutput, additionalOutputId, TestDoFn.additionalOutput));
    BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(JAVA_DO_FN_URN).setData(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnInfo))).build())).build();
    BeamFnApi.Target inputATarget1 = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputATarget1").build();
    BeamFnApi.Target inputATarget2 = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1001L").setName("inputATarget1").build();
    BeamFnApi.Target inputBTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1002L").setName("inputBTarget").build();
    BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("inputA", BeamFnApi.Target.List.newBuilder().addTarget(inputATarget1).addTarget(inputATarget2).build()).putInputs("inputB", BeamFnApi.Target.List.newBuilder().addTarget(inputBTarget).build()).putOutputs(Long.toString(mainOutputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).putOutputs(Long.toString(additionalOutputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
    List<WindowedValue<String>> additionalOutputValues = new ArrayList<>();
    BeamFnApi.Target mainOutputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(mainOutputId)).build();
    BeamFnApi.Target additionalOutputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(additionalOutputId)).build();
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(mainOutputTarget, mainOutputValues::add, additionalOutputTarget, additionalOutputValues::add);
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
    List<ThrowingRunnable> startFunctions = new ArrayList<>();
    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
    handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance("57L")::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
    Iterables.getOnlyElement(startFunctions).run();
    mainOutputValues.clear();
    assertEquals(newConsumers.keySet(), ImmutableSet.of(inputATarget1, inputATarget2, inputBTarget));
    Iterables.getOnlyElement(newConsumers.get(inputATarget1)).accept(valueInGlobalWindow("A1"));
    Iterables.getOnlyElement(newConsumers.get(inputATarget1)).accept(valueInGlobalWindow("A2"));
    Iterables.getOnlyElement(newConsumers.get(inputATarget1)).accept(valueInGlobalWindow("B"));
    assertThat(mainOutputValues, contains(valueInGlobalWindow("MainOutputA1"), valueInGlobalWindow("MainOutputA2"), valueInGlobalWindow("MainOutputB")));
    assertThat(additionalOutputValues, contains(valueInGlobalWindow("AdditionalOutputA1"), valueInGlobalWindow("AdditionalOutputA2"), valueInGlobalWindow("AdditionalOutputB")));
    mainOutputValues.clear();
    additionalOutputValues.clear();
    Iterables.getOnlyElement(finishFunctions).run();
    assertThat(mainOutputValues, contains(timestampedValueInGlobalWindow("FinishBundle", GlobalWindow.INSTANCE.maxTimestamp())));
    mainOutputValues.clear();
}
Also used : Message(com.google.protobuf.Message) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) ThrowingRunnable(org.apache.beam.fn.harness.fn.ThrowingRunnable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BeamFnApi(org.apache.beam.fn.v1.BeamFnApi) CloseableThrowingConsumer(org.apache.beam.fn.harness.fn.CloseableThrowingConsumer) ThrowingConsumer(org.apache.beam.fn.harness.fn.ThrowingConsumer) Test(org.junit.Test)

Example 73 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class BeamFnDataBufferingOutboundObserverTest method testWithDefaultBuffer.

@Test
public void testWithDefaultBuffer() throws Exception {
    Collection<BeamFnApi.Elements> values = new ArrayList<>();
    AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
    CloseableThrowingConsumer<WindowedValue<byte[]>> consumer = new BeamFnDataBufferingOutboundObserver<>(PipelineOptionsFactory.create(), OUTPUT_LOCATION, CODER, TestStreams.withOnNext(values::add).withOnCompleted(() -> onCompletedWasCalled.set(true)).build());
    // Test that nothing is emitted till the default buffer size is surpassed.
    consumer.accept(valueInGlobalWindow(new byte[DEFAULT_BUFFER_LIMIT - 50]));
    assertThat(values, empty());
    // Test that when we cross the buffer, we emit.
    consumer.accept(valueInGlobalWindow(new byte[50]));
    assertEquals(messageWithData(new byte[DEFAULT_BUFFER_LIMIT - 50], new byte[50]), Iterables.get(values, 0));
    // Test that nothing is emitted till the default buffer size is surpassed after a reset
    consumer.accept(valueInGlobalWindow(new byte[DEFAULT_BUFFER_LIMIT - 50]));
    assertEquals(1, values.size());
    // Test that when we cross the buffer, we emit.
    consumer.accept(valueInGlobalWindow(new byte[50]));
    assertEquals(messageWithData(new byte[DEFAULT_BUFFER_LIMIT - 50], new byte[50]), Iterables.get(values, 1));
    // Test that when we close with an empty buffer we only have one end of stream
    consumer.close();
    assertEquals(messageWithData(), Iterables.get(values, 2));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 74 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class BeamFnDataBufferingOutboundObserverTest method testExperimentConfiguresBufferLimit.

@Test
public void testExperimentConfiguresBufferLimit() throws Exception {
    Collection<BeamFnApi.Elements> values = new ArrayList<>();
    AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
    CloseableThrowingConsumer<WindowedValue<byte[]>> consumer = new BeamFnDataBufferingOutboundObserver<>(PipelineOptionsFactory.fromArgs(new String[] { "--experiments=beam_fn_api_data_buffer_limit=100" }).create(), OUTPUT_LOCATION, CODER, TestStreams.withOnNext(values::add).withOnCompleted(() -> onCompletedWasCalled.set(true)).build());
    // Test that nothing is emitted till the default buffer size is surpassed.
    consumer.accept(valueInGlobalWindow(new byte[51]));
    assertThat(values, empty());
    // Test that when we cross the buffer, we emit.
    consumer.accept(valueInGlobalWindow(new byte[49]));
    assertEquals(messageWithData(new byte[51], new byte[49]), Iterables.get(values, 0));
    // Test that when we close we empty the value, and then the stream terminator as part
    // of the same message
    consumer.accept(valueInGlobalWindow(new byte[1]));
    consumer.close();
    assertEquals(BeamFnApi.Elements.newBuilder(messageWithData(new byte[1])).addData(BeamFnApi.Elements.Data.newBuilder().setInstructionReference(OUTPUT_LOCATION.getKey()).setTarget(OUTPUT_LOCATION.getValue())).build(), Iterables.get(values, 1));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 75 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class BeamFnDataGrpcClientTest method testForInboundConsumerThatThrows.

@Test
public void testForInboundConsumerThatThrows() throws Exception {
    CountDownLatch waitForClientToConnect = new CountDownLatch(1);
    AtomicInteger consumerInvoked = new AtomicInteger();
    Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
    AtomicReference<StreamObserver<BeamFnApi.Elements>> outboundServerObserver = new AtomicReference<>();
    CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext(inboundServerValues::add).build();
    BeamFnApi.ApiServiceDescriptor apiServiceDescriptor = BeamFnApi.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID().toString()).build();
    Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {

        @Override
        public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
            outboundServerObserver.set(outboundObserver);
            waitForClientToConnect.countDown();
            return inboundServerObserver;
        }
    }).build();
    server.start();
    RuntimeException exceptionToThrow = new RuntimeException("TestFailure");
    try {
        ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
        BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.create(), (BeamFnApi.ApiServiceDescriptor descriptor) -> channel, this::createStreamForTest);
        CompletableFuture<Void> readFuture = clientFactory.forInboundConsumer(apiServiceDescriptor, KEY_A, CODER, new ThrowingConsumer<WindowedValue<String>>() {

            @Override
            public void accept(WindowedValue<String> t) throws Exception {
                consumerInvoked.incrementAndGet();
                throw exceptionToThrow;
            }
        });
        waitForClientToConnect.await();
        // This first message should cause a failure afterwards all other messages are dropped.
        outboundServerObserver.get().onNext(ELEMENTS_A_1);
        outboundServerObserver.get().onNext(ELEMENTS_A_2);
        try {
            readFuture.get();
            fail("Expected channel to fail");
        } catch (ExecutionException e) {
            assertEquals(exceptionToThrow, e.getCause());
        }
        // The server should not have received any values
        assertThat(inboundServerValues, empty());
        // The consumer should have only been invoked once
        assertEquals(1, consumerInvoked.get());
    } finally {
        server.shutdownNow();
    }
}
Also used : Server(io.grpc.Server) ByteString(com.google.protobuf.ByteString) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ManagedChannel(io.grpc.ManagedChannel) ExecutionException(java.util.concurrent.ExecutionException) StreamObserver(io.grpc.stub.StreamObserver) CallStreamObserver(io.grpc.stub.CallStreamObserver) BeamFnApi(org.apache.beam.fn.v1.BeamFnApi) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) ExecutionException(java.util.concurrent.ExecutionException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Aggregations

WindowedValue (org.apache.beam.sdk.util.WindowedValue)89 Test (org.junit.Test)53 Instant (org.joda.time.Instant)47 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)36 KV (org.apache.beam.sdk.values.KV)19 ArrayList (java.util.ArrayList)17 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)17 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)17 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)17 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)16 TupleTag (org.apache.beam.sdk.values.TupleTag)13 JavaRDD (org.apache.spark.api.java.JavaRDD)8 ByteString (com.google.protobuf.ByteString)7 BeamFnApi (org.apache.beam.fn.v1.BeamFnApi)7 ThrowingConsumer (org.apache.beam.fn.harness.fn.ThrowingConsumer)6 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)6 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)6 CloseableThrowingConsumer (org.apache.beam.fn.harness.fn.CloseableThrowingConsumer)5 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)5 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)5