use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method testCheckpointBufferingWithMultipleBundles.
@Test
public void testCheckpointBufferingWithMultipleBundles() throws Exception {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(10L);
options.setCheckpointingInterval(1L);
TupleTag<String> outputTag = new TupleTag<>("main-output");
StringUtf8Coder coder = StringUtf8Coder.of();
WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(coder);
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
Supplier<DoFnOperator<String, String>> doFnOperatorSupplier = () -> new DoFnOperator<>(new IdentityDoFn<>(), "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
testHarness.open();
// start a bundle
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));
// This callback will be executed in the snapshotState function in the course of
// finishing the currently active bundle. Everything emitted in the callback should
// be buffered and not sent downstream.
doFnOperator.setBundleFinishedCallback(() -> {
try {
// Clear this early for the test here because we want to finish the bundle from within
// the callback which would otherwise cause an infinitive recursion
doFnOperator.setBundleFinishedCallback(null);
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle")));
doFnOperator.invokeFinishBundle();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("check that the previous element is not flushed")));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// Check that we have only the element which was emitted before the snapshot
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element")));
// Check that we would flush the buffered elements when continuing to run
testHarness.processWatermark(Long.MAX_VALUE);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element"), WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed")));
testHarness.close();
// Check that we would flush the buffered elements when restoring from a checkpoint
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 = new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get());
testHarness2.initializeState(snapshot);
testHarness2.open();
testHarness2.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore")));
assertThat(stripStreamRecordFromWindowedValue(testHarness2.getOutput()), contains(WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"), WindowedValue.valueInGlobalWindow("after restore")));
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupWithKeyedInputCleanupTimer.
@SuppressWarnings("LockNotBeforeTry")
@Test
public void testEnsureStateCleanupWithKeyedInputCleanupTimer() {
InMemoryTimerInternals inMemoryTimerInternals = new InMemoryTimerInternals();
KeyedStateBackend keyedStateBackend = Mockito.mock(KeyedStateBackend.class);
Lock stateBackendLock = Mockito.mock(Lock.class);
StringUtf8Coder keyCoder = StringUtf8Coder.of();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
// Test that cleanup timer is set correctly
ExecutableStageDoFnOperator.CleanupTimer cleanupTimer = new ExecutableStageDoFnOperator.CleanupTimer<>(inMemoryTimerInternals, stateBackendLock, WindowingStrategy.globalDefault(), keyCoder, windowCoder, keyedStateBackend);
cleanupTimer.setForWindow(KV.of("key", "string"), window);
Mockito.verify(stateBackendLock).lock();
ByteBuffer key = FlinkKeyUtils.encodeKey("key", keyCoder);
Mockito.verify(keyedStateBackend).setCurrentKey(key);
assertThat(inMemoryTimerInternals.getNextTimer(TimeDomain.EVENT_TIME), is(window.maxTimestamp().plus(Duration.millis(1))));
Mockito.verify(stateBackendLock).unlock();
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupOnFinalWatermark.
@Test
public void testEnsureStateCleanupOnFinalWatermark() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowCoder));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey("key1", keyCoder);
keyedStateBackend.setCurrentKey(key);
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, GlobalWindow.INSTANCE);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
// No timers have been set for cleanup
assertThat(testHarness.numEventTimeTimers(), is(0));
// State has been created
assertThat(testHarness.numKeyedStateEntries(), is(1));
// Generate final watermark to trigger state cleanup
testHarness.processWatermark(new Watermark(BoundedWindow.TIMESTAMP_MAX_VALUE.plus(Duration.millis(1)).getMillis()));
assertThat(testHarness.numKeyedStateEntries(), is(0));
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class FlinkKeyUtilsTest method testEncodeDecode.
@Test
public void testEncodeDecode() {
String key = "key";
StringUtf8Coder coder = StringUtf8Coder.of();
ByteBuffer byteBuffer = FlinkKeyUtils.encodeKey(key, coder);
assertThat(FlinkKeyUtils.decodeKey(byteBuffer, coder), is(key));
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class IsmSideInputReaderTest method testSingletonMultimapInWindow.
@Test
public void testSingletonMultimapInWindow() throws Exception {
IntervalWindow firstWindow = new IntervalWindow(new Instant(0L), new Instant(100L));
IntervalWindow secondWindow = new IntervalWindow(new Instant(50L), new Instant(150L));
IntervalWindow emptyWindow = new IntervalWindow(new Instant(75L), new Instant(175L));
// Collection is iterable, and this is immutable
@SuppressWarnings({ "unchecked", "rawtypes" }) final Map<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>> elements = ImmutableMap.<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>>builder().put(firstWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("foo", 0L).put("foo", 2L).put("bar", -1L).build().asMap(), new Instant(7), firstWindow, PaneInfo.NO_FIRING)).put(secondWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("bar", -1L).put("baz", 1L).put("baz", 3L).build().asMap(), new Instant(53L), secondWindow, PaneInfo.NO_FIRING)).build();
StringUtf8Coder strCoder = StringUtf8Coder.of();
Coder<Map<String, Iterable<Long>>> mapCoder = MapCoder.of(strCoder, IterableCoder.of(VarLongCoder.of()));
final PCollectionView<Map<String, Iterable<Long>>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(Window.into(FixedWindows.of(Duration.millis(100L)))).apply(View.asMultimap());
IsmRecordCoder<WindowedValue<Map<String, Iterable<Long>>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, INTERVAL_WINDOW_CODER));
final Source source = initInputFile(fromValues(elements.values()), recordCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
List<Callable<Map<BoundedWindow, Map<String, Iterable<Long>>>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Map<String, Iterable<Long>> value = reader.get(view, firstWindow);
assertEquals(elements.get(firstWindow).getValue(), value);
// Assert that the same value reference was returned showing that it was cached.
assertSame(value, reader.get(view, firstWindow));
Map<String, Iterable<Long>> secondValue = reader.get(view, secondWindow);
assertEquals(elements.get(secondWindow).getValue(), secondValue);
// Assert that the same value reference was returned showing that it was cached.
assertSame(secondValue, reader.get(view, secondWindow));
Map<String, Iterable<Long>> emptyValue = reader.get(view, emptyWindow);
assertThat(emptyValue.keySet(), empty());
Map<BoundedWindow, Map<String, Iterable<Long>>> result = ImmutableMap.<BoundedWindow, Map<String, Iterable<Long>>>builder().put(firstWindow, value).put(secondWindow, secondValue).put(emptyWindow, emptyValue).build();
return result;
});
}
List<Future<Map<BoundedWindow, Map<String, Iterable<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
Map<BoundedWindow, Map<String, Iterable<Long>>> value = results.get(0).get();
for (Future<Map<BoundedWindow, Map<String, Iterable<Long>>>> result : results) {
assertEquals(value, result.get());
for (Map.Entry<BoundedWindow, Map<String, Iterable<Long>>> entry : result.get().entrySet()) {
assertSame(value.get(entry.getKey()), entry.getValue());
}
}
}
Aggregations