Search in sources :

Example 11 with StringUtf8Coder

use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.

the class DoFnOperatorTest method testCheckpointBufferingWithMultipleBundles.

@Test
public void testCheckpointBufferingWithMultipleBundles() throws Exception {
    FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
    options.setMaxBundleSize(10L);
    options.setCheckpointingInterval(1L);
    TupleTag<String> outputTag = new TupleTag<>("main-output");
    StringUtf8Coder coder = StringUtf8Coder.of();
    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(coder);
    DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
    Supplier<DoFnOperator<String, String>> doFnOperatorSupplier = () -> new DoFnOperator<>(new IdentityDoFn<>(), "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
    Collections.emptyList(), /* side inputs */
    options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
    DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
    OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
    testHarness.open();
    // start a bundle
    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));
    // This callback will be executed in the snapshotState function in the course of
    // finishing the currently active bundle. Everything emitted in the callback should
    // be buffered and not sent downstream.
    doFnOperator.setBundleFinishedCallback(() -> {
        try {
            // Clear this early for the test here because we want to finish the bundle from within
            // the callback which would otherwise cause an infinitive recursion
            doFnOperator.setBundleFinishedCallback(null);
            testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle")));
            doFnOperator.invokeFinishBundle();
            testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("check that the previous element is not flushed")));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
    // Check that we have only the element which was emitted before the snapshot
    assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element")));
    // Check that we would flush the buffered elements when continuing to run
    testHarness.processWatermark(Long.MAX_VALUE);
    assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element"), WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed")));
    testHarness.close();
    // Check that we would flush the buffered elements when restoring from a checkpoint
    OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 = new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get());
    testHarness2.initializeState(snapshot);
    testHarness2.open();
    testHarness2.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore")));
    assertThat(stripStreamRecordFromWindowedValue(testHarness2.getOutput()), contains(WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"), WindowedValue.valueInGlobalWindow("after restore")));
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) Test(org.junit.Test)

Example 12 with StringUtf8Coder

use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupWithKeyedInputCleanupTimer.

@SuppressWarnings("LockNotBeforeTry")
@Test
public void testEnsureStateCleanupWithKeyedInputCleanupTimer() {
    InMemoryTimerInternals inMemoryTimerInternals = new InMemoryTimerInternals();
    KeyedStateBackend keyedStateBackend = Mockito.mock(KeyedStateBackend.class);
    Lock stateBackendLock = Mockito.mock(Lock.class);
    StringUtf8Coder keyCoder = StringUtf8Coder.of();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    // Test that cleanup timer is set correctly
    ExecutableStageDoFnOperator.CleanupTimer cleanupTimer = new ExecutableStageDoFnOperator.CleanupTimer<>(inMemoryTimerInternals, stateBackendLock, WindowingStrategy.globalDefault(), keyCoder, windowCoder, keyedStateBackend);
    cleanupTimer.setForWindow(KV.of("key", "string"), window);
    Mockito.verify(stateBackendLock).lock();
    ByteBuffer key = FlinkKeyUtils.encodeKey("key", keyCoder);
    Mockito.verify(keyedStateBackend).setCurrentKey(key);
    assertThat(inMemoryTimerInternals.getNextTimer(TimeDomain.EVENT_TIME), is(window.maxTimestamp().plus(Duration.millis(1))));
    Mockito.verify(stateBackendLock).unlock();
}
Also used : KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Instant(org.joda.time.Instant) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) ByteBuffer(java.nio.ByteBuffer) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) Test(org.junit.Test) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest)

Example 13 with StringUtf8Coder

use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupOnFinalWatermark.

@Test
public void testEnsureStateCleanupOnFinalWatermark() throws Exception {
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StringUtf8Coder keyCoder = StringUtf8Coder.of();
    WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
    Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowCoder));
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
    when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
    when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
    testHarness.open();
    KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
    ByteBuffer key = FlinkKeyUtils.encodeKey("key1", keyCoder);
    keyedStateBackend.setCurrentKey(key);
    // create some state which can be cleaned up
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    StateNamespace stateNamespace = StateNamespaces.window(windowCoder, GlobalWindow.INSTANCE);
    // State from the SDK Harness is stored as ByteStrings
    BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
    state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
    // No timers have been set for cleanup
    assertThat(testHarness.numEventTimeTimers(), is(0));
    // State has been created
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    // Generate final watermark to trigger state cleanup
    testHarness.processWatermark(new Watermark(BoundedWindow.TIMESTAMP_MAX_VALUE.plus(Duration.millis(1)).getMillis()));
    assertThat(testHarness.numKeyedStateEntries(), is(0));
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest)

Example 14 with StringUtf8Coder

use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.

the class FlinkKeyUtilsTest method testEncodeDecode.

@Test
public void testEncodeDecode() {
    String key = "key";
    StringUtf8Coder coder = StringUtf8Coder.of();
    ByteBuffer byteBuffer = FlinkKeyUtils.encodeKey(key, coder);
    assertThat(FlinkKeyUtils.decodeKey(byteBuffer, coder), is(key));
}
Also used : StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 15 with StringUtf8Coder

use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.

the class IsmSideInputReaderTest method testSingletonMultimapInWindow.

@Test
public void testSingletonMultimapInWindow() throws Exception {
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0L), new Instant(100L));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(50L), new Instant(150L));
    IntervalWindow emptyWindow = new IntervalWindow(new Instant(75L), new Instant(175L));
    // Collection is iterable, and this is immutable
    @SuppressWarnings({ "unchecked", "rawtypes" }) final Map<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>> elements = ImmutableMap.<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>>builder().put(firstWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("foo", 0L).put("foo", 2L).put("bar", -1L).build().asMap(), new Instant(7), firstWindow, PaneInfo.NO_FIRING)).put(secondWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("bar", -1L).put("baz", 1L).put("baz", 3L).build().asMap(), new Instant(53L), secondWindow, PaneInfo.NO_FIRING)).build();
    StringUtf8Coder strCoder = StringUtf8Coder.of();
    Coder<Map<String, Iterable<Long>>> mapCoder = MapCoder.of(strCoder, IterableCoder.of(VarLongCoder.of()));
    final PCollectionView<Map<String, Iterable<Long>>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(Window.into(FixedWindows.of(Duration.millis(100L)))).apply(View.asMultimap());
    IsmRecordCoder<WindowedValue<Map<String, Iterable<Long>>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, INTERVAL_WINDOW_CODER));
    final Source source = initInputFile(fromValues(elements.values()), recordCoder);
    final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
    List<Callable<Map<BoundedWindow, Map<String, Iterable<Long>>>>> tasks = new ArrayList<>();
    for (int i = 0; i < NUM_THREADS; ++i) {
        tasks.add(() -> {
            // Store a strong reference to the returned value so that the logical reference
            // cache is not cleared for this test.
            Map<String, Iterable<Long>> value = reader.get(view, firstWindow);
            assertEquals(elements.get(firstWindow).getValue(), value);
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(value, reader.get(view, firstWindow));
            Map<String, Iterable<Long>> secondValue = reader.get(view, secondWindow);
            assertEquals(elements.get(secondWindow).getValue(), secondValue);
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(secondValue, reader.get(view, secondWindow));
            Map<String, Iterable<Long>> emptyValue = reader.get(view, emptyWindow);
            assertThat(emptyValue.keySet(), empty());
            Map<BoundedWindow, Map<String, Iterable<Long>>> result = ImmutableMap.<BoundedWindow, Map<String, Iterable<Long>>>builder().put(firstWindow, value).put(secondWindow, secondValue).put(emptyWindow, emptyValue).build();
            return result;
        });
    }
    List<Future<Map<BoundedWindow, Map<String, Iterable<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
    Map<BoundedWindow, Map<String, Iterable<Long>>> value = results.get(0).get();
    for (Future<Map<BoundedWindow, Map<String, Iterable<Long>>>> result : results) {
        assertEquals(value, result.get());
        for (Map.Entry<BoundedWindow, Map<String, Iterable<Long>>> entry : result.get().entrySet()) {
            assertSame(value.get(entry.getKey()), entry.getValue());
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Source(com.google.api.services.dataflow.model.Source) Callable(java.util.concurrent.Callable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) Future(java.util.concurrent.Future) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)17 Test (org.junit.Test)15 WindowedValue (org.apache.beam.sdk.util.WindowedValue)13 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)10 TupleTag (org.apache.beam.sdk.values.TupleTag)10 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)9 KV (org.apache.beam.sdk.values.KV)8 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)8 ByteBuffer (java.nio.ByteBuffer)7 FlinkPipelineOptions (org.apache.beam.runners.flink.FlinkPipelineOptions)7 DoFn (org.apache.beam.sdk.transforms.DoFn)6 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)6 Instant (org.joda.time.Instant)6 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)5 HashMap (java.util.HashMap)4 StateNamespace (org.apache.beam.runners.core.StateNamespace)4 VarLongCoder (org.apache.beam.sdk.coders.VarLongCoder)4 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)4 ArrayList (java.util.ArrayList)3 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3