use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class GroupAlsoByWindowsParDoFn method createRunner.
/**
* Composes and returns a {@link DoFnRunner} based on the parameters.
*
* <p>A {@code SimpleOldDoFnRunner} executes the {@link GroupAlsoByWindowFn}.
*
* <p>A {@link LateDataDroppingDoFnRunner} handles late data dropping for a {@link
* StreamingGroupAlsoByWindowViaWindowSetFn}.
*
* <p>A {@link StreamingSideInputDoFnRunner} handles streaming side inputs.
*
* <p>A {@link StreamingKeyedWorkItemSideInputDoFnRunner} handles streaming side inputs for a
* {@link StreamingGroupAlsoByWindowViaWindowSetFn}.
*/
private DoFnRunner<InputT, KV<K, Iterable<V>>> createRunner() {
OutputManager outputManager = new OutputManager() {
@Override
public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
checkState(tag.equals(mainOutputTag), "Must only output to main output tag (%s), but was %s", tag, mainOutputTag);
try {
receiver.process(output);
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
};
boolean hasStreamingSideInput = options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty();
DoFnRunner<InputT, KV<K, Iterable<V>>> basicRunner = new GroupAlsoByWindowFnRunner<>(options, doFn, sideInputReader, outputManager, mainOutputTag, stepContext);
if (doFn instanceof StreamingGroupAlsoByWindowViaWindowSetFn) {
DoFnRunner<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> streamingGABWRunner = (DoFnRunner<KeyedWorkItem<K, V>, KV<K, Iterable<V>>>) basicRunner;
if (hasStreamingSideInput) {
@SuppressWarnings("unchecked") WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<K, V> keyedWorkItemCoder = (WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<K, V>) inputCoder;
StreamingSideInputFetcher<V, W> sideInputFetcher = new StreamingSideInputFetcher<>(sideInputViews, keyedWorkItemCoder.getElementCoder(), windowingStrategy, (StreamingModeExecutionContext.StreamingModeStepContext) stepContext);
streamingGABWRunner = new StreamingKeyedWorkItemSideInputDoFnRunner<>(streamingGABWRunner, keyedWorkItemCoder.getKeyCoder(), sideInputFetcher, stepContext);
}
return (DoFnRunner<InputT, KV<K, Iterable<V>>>) DoFnRunners.<K, V, Iterable<V>, W>lateDataDroppingRunner(streamingGABWRunner, stepContext.timerInternals(), windowingStrategy);
} else {
if (hasStreamingSideInput) {
return new StreamingSideInputDoFnRunner<>(basicRunner, new StreamingSideInputFetcher<>(sideInputViews, inputCoder, windowingStrategy, (StreamingModeExecutionContext.StreamingModeStepContext) stepContext));
} else {
return basicRunner;
}
}
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method testEmpty.
@Test
public void testEmpty() throws Exception {
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
runner.startBundle();
runner.finishBundle();
List<?> result = outputManager.getOutput(outputTag);
assertThat(result.size(), equalTo(0));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindows.
@Test
public void testSlidingWindows() throws Exception {
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(5));
runner.startBundle();
WorkItem.Builder workItem1 = WorkItem.newBuilder();
workItem1.setKey(ByteString.copyFromUtf8(KEY));
workItem1.setWorkToken(WORK_TOKEN);
InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
Coder<String> valueCoder = StringUtf8Coder.of();
addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
runner.processElement(createValue(workItem1, valueCoder));
runner.finishBundle();
runner.startBundle();
WorkItem.Builder workItem2 = WorkItem.newBuilder();
workItem2.setKey(ByteString.copyFromUtf8(KEY));
workItem2.setWorkToken(WORK_TOKEN);
addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
runner.processElement(createValue(workItem2, valueCoder));
runner.finishBundle();
List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
assertThat(result.size(), equalTo(3));
assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1")), equalTo(new Instant(2)), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method createRunner.
@SuppressWarnings("unchecked")
private StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> createRunner(DoFnRunners.OutputManager outputManager) throws Exception {
CoderRegistry registry = CoderRegistry.createDefault();
Coder<String> keyCoder = StringUtf8Coder.of();
Coder<Integer> inputCoder = BigEndianIntegerCoder.of();
AppliedCombineFn<String, Integer, ?, Integer> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), registry, KvCoder.of(keyCoder, inputCoder));
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(WINDOW_FN);
@SuppressWarnings("rawtypes") StreamingGroupAlsoByWindowViaWindowSetFn doFn = (StreamingGroupAlsoByWindowViaWindowSetFn) StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, key -> state, combineFn, keyCoder);
DoFnRunner<KeyedWorkItem<String, Integer>, KV<String, Integer>> simpleDoFnRunner = new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), doFn.asDoFn(), mockSideInputReader, outputManager, mainOutputTag, stepContext);
return new StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow>(simpleDoFnRunner, keyCoder, sideInputFetcher, stepContext);
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class GroupByKeyOp method open.
@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<K>> timerRegistry, OpEmitter<KV<K, OutputT>> emitter) {
final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.pipelineOptions = samzaExecutionContext.getPipelineOptions();
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), keyCoder, pipelineOptions.getStoreBatchGetSize());
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
final DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn = GroupAlsoByWindowViaWindowSetNewDoFn.create(windowingStrategy, stateInternalsFactory, timerInternalsFactory, NullSideInputReader.of(Collections.emptyList()), reduceFn, outputManager, mainOutputTag);
final KeyedInternals<K> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
final StepContext stepContext = new StepContext() {
@Override
public StateInternals stateInternals() {
return keyedInternals.stateInternals();
}
@Override
public TimerInternals timerInternals() {
return keyedInternals.timerInternals();
}
};
final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), doFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.fnRunner = DoFnRunnerWithMetrics.wrap(doFnRunner, executionContext.getMetricsContainer(), transformFullName);
}
Aggregations