use of org.apache.beam.runners.core.construction.Timer in project beam by apache.
the class ProcessBundleDescriptors method forTimerSpecs.
private static Map<String, Map<String, TimerSpec>> forTimerSpecs(ExecutableStage stage, Components.Builder components) throws IOException {
ImmutableTable.Builder<String, String, TimerSpec> idsToSpec = ImmutableTable.builder();
for (TimerReference timerReference : stage.getTimers()) {
RunnerApi.ParDoPayload payload = RunnerApi.ParDoPayload.parseFrom(timerReference.transform().getTransform().getSpec().getPayload());
RunnerApi.TimerFamilySpec timerFamilySpec = payload.getTimerFamilySpecsOrThrow(timerReference.localName());
org.apache.beam.sdk.state.TimerSpec spec;
switch(timerFamilySpec.getTimeDomain()) {
case EVENT_TIME:
spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
break;
case PROCESSING_TIME:
spec = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
break;
default:
throw new IllegalArgumentException(String.format("Unknown or unsupported time domain %s", timerFamilySpec.getTimeDomain()));
}
for (WireCoderSetting wireCoderSetting : stage.getWireCoderSettings()) {
if (wireCoderSetting.hasTimer() && wireCoderSetting.getTimer().getTransformId().equals(timerReference.transform().getId()) && wireCoderSetting.getTimer().getLocalName().equals(timerReference.localName())) {
throw new UnsupportedOperationException("WireCoderSetting for timer is yet to be supported.");
}
}
String originalTimerCoderId = timerFamilySpec.getTimerFamilyCoderId();
String sdkCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(originalTimerCoderId, components, false);
String runnerCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(originalTimerCoderId, components, true);
Coder<?> timerCoder = RehydratedComponents.forComponents(components.build()).getCoder(runnerCoderId);
checkArgument(timerCoder instanceof Timer.Coder, "Expected a timer coder but received %s.", timerCoder);
RunnerApi.FunctionSpec.Builder updatedSpec = components.getTransformsOrThrow(timerReference.transform().getId()).toBuilder().getSpecBuilder();
RunnerApi.ParDoPayload.Builder updatedPayload = RunnerApi.ParDoPayload.parseFrom(updatedSpec.getPayload()).toBuilder();
updatedPayload.putTimerFamilySpecs(timerReference.localName(), updatedPayload.getTimerFamilySpecsOrThrow(timerReference.localName()).toBuilder().setTimerFamilyCoderId(sdkCoderId).build());
updatedSpec.setPayload(updatedPayload.build().toByteString());
components.putTransforms(timerReference.transform().getId(), // and not the original
components.getTransformsOrThrow(timerReference.transform().getId()).toBuilder().setSpec(updatedSpec).build());
idsToSpec.put(timerReference.transform().getId(), timerReference.localName(), TimerSpec.of(timerReference.transform().getId(), timerReference.localName(), spec, (Coder) timerCoder));
}
return idsToSpec.build().rowMap();
}
use of org.apache.beam.runners.core.construction.Timer in project beam by apache.
the class PTransformRunnerFactoryTestContext method addOutgoingTimersEndpoint.
@Override
public <T> FnDataReceiver<Timer<T>> addOutgoingTimersEndpoint(String timerFamilyId, Coder<Timer<T>> coder) {
BeamFnDataOutboundAggregator aggregator = getOutboundAggregators().get(getTimerApiServiceDescriptor());
FnDataReceiver<Timer<T>> receiver = aggregator.registerOutputTimersLocation(getPTransformId(), timerFamilyId, coder);
getOutgoingTimersEndpoints().add(TimerEndpoint.create(getPTransformId(), timerFamilyId, coder, receiver));
return receiver;
}
use of org.apache.beam.runners.core.construction.Timer in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testWatermarkHandling.
@Test
public void testWatermarkHandling() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
ExecutableStageDoFnOperator<KV<String, Integer>, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))), StringUtf8Coder.of(), WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), IntervalWindow.getCoder()));
KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, val -> val.getValue().getKey(), new CoderTypeInformation<>(StringUtf8Coder.of(), FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.<KV<String, String>, FnDataReceiver<WindowedValue>>builder().put(KV.of("transform", "timer"), Mockito.mock(FnDataReceiver.class)).put(KV.of("transform", "timer2"), Mockito.mock(FnDataReceiver.class)).put(KV.of("transform", "timer3"), Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// No bundle has been started, watermark can be freely advanced
testHarness.processWatermark(0);
assertThat(operator.getCurrentOutputWatermark(), is(0L));
// Trigger a new bundle
IntervalWindow intervalWindow = new IntervalWindow(new Instant(0), new Instant(9));
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), Instant.now(), intervalWindow, PaneInfo.NO_FIRING);
testHarness.processElement(new StreamRecord<>(windowedValue));
// The output watermark should be held back during the bundle
testHarness.processWatermark(1);
assertThat(operator.getEffectiveInputWatermark(), is(1L));
assertThat(operator.getCurrentOutputWatermark(), is(0L));
// After the bundle has been finished, the watermark should be advanced
operator.invokeFinishBundle();
assertThat(operator.getCurrentOutputWatermark(), is(1L));
// Bundle finished, watermark can be freely advanced
testHarness.processWatermark(2);
assertThat(operator.getEffectiveInputWatermark(), is(2L));
assertThat(operator.getCurrentOutputWatermark(), is(2L));
// Trigger a new bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
// cleanup timer
assertThat(testHarness.numEventTimeTimers(), is(1));
// Set at timer
Instant timerTarget = new Instant(5);
Instant timerTarget2 = new Instant(6);
operator.getLockToAcquireForStateAccessDuringBundles().lock();
BiConsumer<String, Instant> timerConsumer = (timerId, timestamp) -> operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), timestamp, timestamp, PaneInfo.NO_FIRING), TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId("transform", timerId), StateNamespaces.window(IntervalWindow.getCoder(), intervalWindow), timestamp, timestamp, TimeDomain.EVENT_TIME));
timerConsumer.accept("timer", timerTarget);
timerConsumer.accept("timer2", timerTarget2);
assertThat(testHarness.numEventTimeTimers(), is(3));
// Advance input watermark past the timer
// Check the output watermark is held back
long targetWatermark = timerTarget.getMillis() + 100;
testHarness.processWatermark(targetWatermark);
// Do not yet advance the output watermark because we are still processing a bundle
assertThat(testHarness.numEventTimeTimers(), is(3));
assertThat(operator.getCurrentOutputWatermark(), is(2L));
// Check that the timers are fired but the output watermark is advanced no further than
// the minimum timer timestamp of the previous bundle because we are still processing a
// bundle which might contain more timers.
// Timers can create loops if they keep rescheduling themselves when firing
// Thus, we advance the watermark asynchronously to allow for checkpointing to run
operator.invokeFinishBundle();
assertThat(testHarness.numEventTimeTimers(), is(3));
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(operator.getCurrentOutputWatermark(), is(5L));
// Output watermark is advanced synchronously when the bundle finishes,
// no more timers are scheduled
operator.invokeFinishBundle();
assertThat(operator.getCurrentOutputWatermark(), is(targetWatermark));
assertThat(testHarness.numEventTimeTimers(), is(0));
// Watermark is advanced in a blocking fashion on close, not via a timers
// Create a bundle with a pending timer to simulate that
testHarness.processElement(new StreamRecord<>(windowedValue));
timerConsumer.accept("timer3", new Instant(targetWatermark));
assertThat(testHarness.numEventTimeTimers(), is(1));
// This should be blocking until the watermark reaches Long.MAX_VALUE.
testHarness.close();
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(operator.getCurrentOutputWatermark(), is(Long.MAX_VALUE));
}
use of org.apache.beam.runners.core.construction.Timer in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.
private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
KV<String, String> timerInputKey = KV.of("transformId", "timerId");
AtomicBoolean timerInputReceived = new AtomicBoolean();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
doAnswer((invocation) -> {
timerInputReceived.set(true);
return null;
}).when(timerReceiver).accept(any());
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
stateBackendLock.lock();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
keyedStateBackend.setCurrentKey(key);
DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
assertThat(testHarness.numKeyedStateEntries(), is(1));
// user timer that fires after the end of the window and after state cleanup
TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
timerInternals.setTimer(userTimer);
// start of bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
verify(receiver).accept(windowedValue);
// move watermark past user timer while bundle is in progress
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
// Output watermark is held back and timers do not yet fire (they can still be changed!)
assertThat(timerInputReceived.get(), is(false));
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// The timer fires on bundle finish
operator.invokeFinishBundle();
assertThat(timerInputReceived.getAndSet(false), is(true));
// Move watermark past the cleanup timer
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
operator.invokeFinishBundle();
// Cleanup timer has fired and cleanup queue is prepared for bundle finish
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(testHarness.numKeyedStateEntries(), is(1));
assertThat(cleanupQueue, hasSize(1));
// Cleanup timer are rescheduled if a new timer is created during the bundle
TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
assertThat(testHarness.numEventTimeTimers(), is(1));
if (withCheckpointing) {
// Upon checkpointing, the bundle will be finished.
testHarness.snapshot(0, 0);
} else {
operator.invokeFinishBundle();
}
// Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
// for the window.
assertThat(cleanupQueue, hasSize(0));
verifyNoMoreInteractions(receiver);
assertThat(testHarness.numKeyedStateEntries(), is(2));
assertThat(testHarness.numEventTimeTimers(), is(2));
// No timer has been fired but bundle should be ended
assertThat(timerInputReceived.get(), is(false));
assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
// Allow user timer and cleanup timer to fire by triggering watermark advancement
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(timerInputReceived.getAndSet(false), is(true));
assertThat(cleanupQueue, hasSize(1));
// Cleanup will be executed after the bundle is complete because there are no more pending
// timers for the window
operator.invokeFinishBundle();
assertThat(cleanupQueue, hasSize(0));
assertThat(testHarness.numKeyedStateEntries(), is(0));
testHarness.close();
verifyNoMoreInteractions(receiver);
}
use of org.apache.beam.runners.core.construction.Timer in project beam by apache.
the class FlinkExecutableStageFunction method reduce.
/**
* For stateful and timer processing via a GroupReduceFunction.
*/
@Override
public void reduce(Iterable<WindowedValue<InputT>> iterable, Collector<RawUnionValue> collector) throws Exception {
// Need to discard the old key's state
if (bagUserStateHandlerFactory != null) {
bagUserStateHandlerFactory.resetForNewKey();
}
// Used with Batch, we know that all the data is available for this key. We can't use the
// timer manager from the context because it doesn't exist. So we create one and advance
// time to the end after processing all elements.
final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
timerInternals.advanceProcessingTime(Instant.now());
timerInternals.advanceSynchronizedProcessingTime(Instant.now());
ReceiverFactory receiverFactory = new ReceiverFactory(collector, outputMap);
TimerReceiverFactory timerReceiverFactory = new TimerReceiverFactory(stageBundleFactory, (Timer<?> timer, TimerInternals.TimerData timerData) -> {
currentTimerKey = timer.getUserKey();
if (timer.getClearBit()) {
timerInternals.deleteTimer(timerData);
} else {
timerInternals.setTimer(timerData);
}
}, windowCoder);
// First process all elements and make sure no more elements can arrive
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, timerReceiverFactory, stateRequestHandler, progressHandler)) {
processElements(iterable, bundle);
}
// Finish any pending windows by advancing the input watermark to infinity.
timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Finally, advance the processing time to infinity to fire any timers.
timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Now we fire the timers and process elements generated by timers (which may be timers itself)
while (timerInternals.hasPendingTimers()) {
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, timerReceiverFactory, stateRequestHandler, progressHandler)) {
PipelineTranslatorUtils.fireEligibleTimers(timerInternals, bundle.getTimerReceivers(), currentTimerKey);
}
}
}
Aggregations