use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class KafkaIOTest method testInferKeyCoder.
@Test
public void testInferKeyCoder() {
CoderRegistry registry = CoderRegistry.createDefault();
assertTrue(KafkaIO.inferCoder(registry, LongDeserializer.class).getValueCoder() instanceof VarLongCoder);
assertTrue(KafkaIO.inferCoder(registry, StringDeserializer.class).getValueCoder() instanceof StringUtf8Coder);
assertTrue(KafkaIO.inferCoder(registry, InstantDeserializer.class).getValueCoder() instanceof InstantCoder);
assertTrue(KafkaIO.inferCoder(registry, DeserializerWithInterfaces.class).getValueCoder() instanceof VarLongCoder);
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.
private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
KV<String, String> timerInputKey = KV.of("transformId", "timerId");
AtomicBoolean timerInputReceived = new AtomicBoolean();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
doAnswer((invocation) -> {
timerInputReceived.set(true);
return null;
}).when(timerReceiver).accept(any());
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
stateBackendLock.lock();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
keyedStateBackend.setCurrentKey(key);
DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
assertThat(testHarness.numKeyedStateEntries(), is(1));
// user timer that fires after the end of the window and after state cleanup
TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
timerInternals.setTimer(userTimer);
// start of bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
verify(receiver).accept(windowedValue);
// move watermark past user timer while bundle is in progress
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
// Output watermark is held back and timers do not yet fire (they can still be changed!)
assertThat(timerInputReceived.get(), is(false));
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// The timer fires on bundle finish
operator.invokeFinishBundle();
assertThat(timerInputReceived.getAndSet(false), is(true));
// Move watermark past the cleanup timer
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
operator.invokeFinishBundle();
// Cleanup timer has fired and cleanup queue is prepared for bundle finish
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(testHarness.numKeyedStateEntries(), is(1));
assertThat(cleanupQueue, hasSize(1));
// Cleanup timer are rescheduled if a new timer is created during the bundle
TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
assertThat(testHarness.numEventTimeTimers(), is(1));
if (withCheckpointing) {
// Upon checkpointing, the bundle will be finished.
testHarness.snapshot(0, 0);
} else {
operator.invokeFinishBundle();
}
// Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
// for the window.
assertThat(cleanupQueue, hasSize(0));
verifyNoMoreInteractions(receiver);
assertThat(testHarness.numKeyedStateEntries(), is(2));
assertThat(testHarness.numEventTimeTimers(), is(2));
// No timer has been fired but bundle should be ended
assertThat(timerInputReceived.get(), is(false));
assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
// Allow user timer and cleanup timer to fire by triggering watermark advancement
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(timerInputReceived.getAndSet(false), is(true));
assertThat(cleanupQueue, hasSize(1));
// Cleanup will be executed after the bundle is complete because there are no more pending
// timers for the window
operator.invokeFinishBundle();
assertThat(cleanupQueue, hasSize(0));
assertThat(testHarness.numKeyedStateEntries(), is(0));
testHarness.close();
verifyNoMoreInteractions(receiver);
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class BufferedElementsTest method testCoder.
@Test
public void testCoder() throws IOException {
StringUtf8Coder elementCoder = StringUtf8Coder.of();
// Generics fail to see here that this is Coder<BoundedWindow>
org.apache.beam.sdk.coders.Coder windowCoder = GlobalWindow.Coder.INSTANCE;
WindowedValue.WindowedValueCoder windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(elementCoder, windowCoder);
KV<String, Integer> key = KV.of("one", 1);
BufferedElements.Coder coder = new BufferedElements.Coder(windowedValueCoder, windowCoder, key);
BufferedElement element = new BufferedElements.Element(WindowedValue.of("test", new Instant(2), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
BufferedElement timerElement = new BufferedElements.Timer("timerId", "timerId", key, GlobalWindow.INSTANCE, new Instant(1), new Instant(1), TimeDomain.EVENT_TIME);
testRoundTrip(ImmutableList.of(element), coder);
testRoundTrip(ImmutableList.of(timerElement), coder);
testRoundTrip(ImmutableList.of(element, timerElement), coder);
testRoundTrip(ImmutableList.of(element, timerElement, element), coder);
testRoundTrip(ImmutableList.of(element, element, element, timerElement, timerElement), coder);
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method testExactlyOnceBufferingKeyed.
@Test
public void testExactlyOnceBufferingKeyed() throws Exception {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(2L);
options.setCheckpointingInterval(1L);
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, String> keySelector = new KvToByteBufferKeySelector<>(keyCoder, new SerializablePipelineOptions(options));
KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder);
DoFn<KV<String, String>, KV<String, String>> doFn = new DoFn<KV<String, String>, KV<String, String>>() {
@StartBundle
public void startBundle() {
numStartBundleCalled++;
}
@ProcessElement
// Use RequiresStableInput to force buffering elements
@RequiresStableInput
public void processElement(ProcessContext context) {
context.output(context.element());
}
@FinishBundle
public void finishBundle(FinishBundleContext context) {
context.output(KV.of("key3", "finishBundle"), BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
}
};
DoFnOperator.MultiOutputOutputManagerFactory<KV<String, String>> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(kvCoder, GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
Supplier<DoFnOperator<KV<String, String>, KV<String, String>>> doFnOperatorSupplier = () -> new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = doFnOperatorSupplier.get();
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "a"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "b"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key2", "c"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key2", "d"))));
assertThat(Iterables.size(testHarness.getOutput()), is(0));
OperatorSubtaskState backup = testHarness.snapshot(0, 0);
doFnOperator.notifyCheckpointComplete(0L);
assertThat(numStartBundleCalled, is(1));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("key", "a")), WindowedValue.valueInGlobalWindow(KV.of("key", "b")), WindowedValue.valueInGlobalWindow(KV.of("key2", "c")), WindowedValue.valueInGlobalWindow(KV.of("key2", "d")), WindowedValue.valueInGlobalWindow(KV.of("key3", "finishBundle"))));
doFnOperator = doFnOperatorSupplier.get();
testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
// restore from the snapshot
testHarness.initializeState(backup);
testHarness.open();
doFnOperator.notifyCheckpointComplete(0L);
assertThat(numStartBundleCalled, is(2));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("key", "a")), WindowedValue.valueInGlobalWindow(KV.of("key", "b")), WindowedValue.valueInGlobalWindow(KV.of("key2", "c")), WindowedValue.valueInGlobalWindow(KV.of("key2", "d")), WindowedValue.valueInGlobalWindow(KV.of("key3", "finishBundle"))));
// repeat to see if elements are evicted
doFnOperator.notifyCheckpointComplete(1L);
assertThat(numStartBundleCalled, is(2));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("key", "a")), WindowedValue.valueInGlobalWindow(KV.of("key", "b")), WindowedValue.valueInGlobalWindow(KV.of("key2", "c")), WindowedValue.valueInGlobalWindow(KV.of("key2", "d")), WindowedValue.valueInGlobalWindow(KV.of("key3", "finishBundle"))));
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method testStateRestore.
@Test
public void testStateRestore() throws Exception {
DoFn<KV<String, Long>, KV<String, Long>> filterElementsEqualToCountFn = new DoFn<KV<String, Long>, KV<String, Long>>() {
@StateId("counter")
private final StateSpec<ValueState<Long>> counterSpec = StateSpecs.value(VarLongCoder.of());
@ProcessElement
public void processElement(ProcessContext context, @StateId("counter") ValueState<Long> count) {
long currentCount = Optional.ofNullable(count.read()).orElse(0L);
currentCount = currentCount + 1;
count.write(currentCount);
KV<String, Long> currentElement = context.element();
if (currentCount == currentElement.getValue()) {
context.output(currentElement);
}
}
};
WindowingStrategy<Object, GlobalWindow> windowingStrategy = WindowingStrategy.globalDefault();
TupleTag<KV<String, Long>> outputTag = new TupleTag<>("main-output");
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, Long> keySelector = new KvToByteBufferKeySelector<>(keyCoder, null);
KvCoder<String, Long> coder = KvCoder.of(keyCoder, VarLongCoder.of());
FullWindowedValueCoder<KV<String, Long>> kvCoder = WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());
CoderTypeInformation<ByteBuffer> keyCoderInfo = new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults());
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, Long>>, WindowedValue<KV<String, Long>>> testHarness = createTestHarness(windowingStrategy, filterElementsEqualToCountFn, kvCoder, kvCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
testHarness.close();
testHarness = createTestHarness(windowingStrategy, filterElementsEqualToCountFn, kvCoder, kvCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.initializeState(snapshot);
testHarness.open();
// after restore: counter = 2
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 4L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("a", 4L)), WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));
testHarness.close();
}
Aggregations