use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method testSessions.
@Test
public void testSessions() throws Exception {
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
runner.startBundle();
WorkItem.Builder workItem1 = WorkItem.newBuilder();
workItem1.setKey(ByteString.copyFromUtf8(KEY));
workItem1.setWorkToken(WORK_TOKEN);
InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
Coder<String> valueCoder = StringUtf8Coder.of();
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v1");
addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, "v2");
addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, "v3");
addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, "v0");
runner.processElement(createValue(workItem1, valueCoder));
runner.finishBundle();
runner.startBundle();
WorkItem.Builder workItem2 = WorkItem.newBuilder();
workItem2.setKey(ByteString.copyFromUtf8(KEY));
workItem2.setWorkToken(WORK_TOKEN);
// Note that the WATERMARK timer for Instant(9) will have been deleted by
// ReduceFnRunner when window(0, 10) was merged away.
addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
runner.processElement(createValue(workItem2, valueCoder));
runner.finishBundle();
List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
assertThat(result.size(), equalTo(2));
assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(0)), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(new Instant(15)), equalTo(window(15, 25)))));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method testSessionsCombine.
@Test
public void testSessionsCombine() throws Exception {
TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
CombineFn<Long, ?, Long> combineFn = new SumLongs();
CoderRegistry registry = CoderRegistry.createDefault();
AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(combineFn, registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))), appliedCombineFn);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
runner.startBundle();
WorkItem.Builder workItem1 = WorkItem.newBuilder();
workItem1.setKey(ByteString.copyFromUtf8(KEY));
workItem1.setWorkToken(WORK_TOKEN);
InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
Coder<Long> valueCoder = BigEndianLongCoder.of();
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, 1L);
addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, 2L);
addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, 3L);
addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, 4L);
runner.processElement(createValue(workItem1, valueCoder));
runner.finishBundle();
runner.startBundle();
WorkItem.Builder workItem2 = WorkItem.newBuilder();
workItem2.setKey(ByteString.copyFromUtf8(KEY));
workItem2.setWorkToken(WORK_TOKEN);
// Note that the WATERMARK timer for Instant(9) will have been deleted by
// ReduceFnRunner when window(0, 10) was merged away.
addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
runner.processElement(createValue(workItem2, valueCoder));
runner.finishBundle();
List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
assertThat(result.size(), equalTo(2));
assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(7L)), equalTo(window(0, 15).maxTimestamp()), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(3L)), equalTo(window(15, 25).maxTimestamp()), equalTo(window(15, 25)))));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class WindmillKeyedWorkItemTest method testElementIteration.
@Test
public void testElementIteration() throws Exception {
Windmill.WorkItem.Builder workItem = Windmill.WorkItem.newBuilder().setKey(SERIALIZED_KEY).setWorkToken(17);
Windmill.InputMessageBundle.Builder chunk1 = workItem.addMessageBundlesBuilder();
chunk1.setSourceComputationId("computation");
addElement(chunk1, 5, "hello", WINDOW_1, paneInfo(0));
addElement(chunk1, 7, "world", WINDOW_2, paneInfo(2));
Windmill.InputMessageBundle.Builder chunk2 = workItem.addMessageBundlesBuilder();
chunk2.setSourceComputationId("computation");
addElement(chunk2, 6, "earth", WINDOW_1, paneInfo(1));
KeyedWorkItem<String, String> keyedWorkItem = new WindmillKeyedWorkItem<>(KEY, workItem.build(), WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
assertThat(keyedWorkItem.elementsIterable(), Matchers.contains(WindowedValue.of("hello", new Instant(5), WINDOW_1, paneInfo(0)), WindowedValue.of("world", new Instant(7), WINDOW_2, paneInfo(2)), WindowedValue.of("earth", new Instant(6), WINDOW_1, paneInfo(1))));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StatefulParDoEvaluatorFactoryTest method testUnprocessedElements.
/**
* A test that explicitly delays a side input so that the main input will have to be reprocessed,
* testing that {@code finishBundle()} re-assembles the GBK outputs correctly.
*/
@Test
public void testUnprocessedElements() throws Exception {
// To test the factory, first we set up a pipeline and then we use the constructed
// pipeline to create the right parameters to pass to the factory
final String stateId = "my-state-id";
// For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
PCollection<KV<String, Integer>> mainInput = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.into(FixedWindows.of(Duration.millis(10))));
final PCollectionView<List<Integer>> sideInput = pipeline.apply("Create side input", Create.of(42)).apply("Window side input", Window.into(FixedWindows.of(Duration.millis(10)))).apply("View side input", View.asList());
TupleTag<Integer> mainOutput = new TupleTag<>();
PCollection<Integer> produced = mainInput.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void process(ProcessContext c) {
}
}, mainOutput, TupleTagList.empty(), Collections.singletonList(sideInput), DoFnSchemaInformation.create(), Collections.emptyMap())).get(mainOutput).setCoder(VarIntCoder.of());
StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory<>(mockEvaluationContext, options);
// This will be the stateful ParDo from the expansion
AppliedPTransform<PCollection<KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
// Then there will be a digging down to the step context to get the state internals
when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
when(mockExecutionContext.getStepContext(any())).thenReturn(mockStepContext);
when(mockEvaluationContext.createBundle(Matchers.<PCollection<Integer>>any())).thenReturn(mockUncommittedBundle);
when(mockStepContext.getTimerUpdate()).thenReturn(TimerUpdate.empty());
// And digging to check whether the window is ready
when(mockEvaluationContext.createSideInputReader(anyList())).thenReturn(mockSideInputReader);
when(mockSideInputReader.isReady(Matchers.any(), Matchers.any())).thenReturn(false);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
// A single bundle with some elements in the global window; it should register cleanup for the
// global window state merely by having the evaluator created. The cleanup logic does not
// depend on the window.
String key = "hello";
WindowedValue<KV<String, Integer>> firstKv = WindowedValue.of(KV.of(key, 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING);
WindowedValue<KeyedWorkItem<String, KV<String, Integer>>> gbkOutputElement = firstKv.withValue(KeyedWorkItems.elementsWorkItem("hello", ImmutableList.of(firstKv, firstKv.withValue(KV.of(key, 13)), firstKv.withValue(KV.of(key, 15)))));
CommittedBundle<KeyedWorkItem<String, KV<String, Integer>>> inputBundle = BUNDLE_FACTORY.createBundle((PCollection<KeyedWorkItem<String, KV<String, Integer>>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(producingTransform))).add(gbkOutputElement).commit(Instant.now());
TransformEvaluator<KeyedWorkItem<String, KV<String, Integer>>> evaluator = factory.forApplication(producingTransform, inputBundle);
evaluator.processElement(gbkOutputElement);
// This should push back every element as a KV<String, Iterable<Integer>>
// in the appropriate window. Since the keys are equal they are single-threaded
TransformResult<KeyedWorkItem<String, KV<String, Integer>>> result = evaluator.finishBundle();
List<Integer> pushedBackInts = new ArrayList<>();
for (WindowedValue<? extends KeyedWorkItem<String, KV<String, Integer>>> unprocessedElement : result.getUnprocessedElements()) {
assertThat(Iterables.getOnlyElement(unprocessedElement.getWindows()), equalTo((BoundedWindow) firstWindow));
assertThat(unprocessedElement.getValue().key(), equalTo("hello"));
for (WindowedValue<KV<String, Integer>> windowedKv : unprocessedElement.getValue().elementsIterable()) {
pushedBackInts.add(windowedKv.getValue().getValue());
}
}
assertThat(pushedBackInts, containsInAnyOrder(1, 13, 15));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class WindowingWindmillReader method iterator.
@Override
public NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>> iterator() throws IOException {
final K key = keyCoder.decode(context.getSerializedKey().newInput(), Coder.Context.OUTER);
final WorkItem workItem = context.getWork();
KeyedWorkItem<K, T> keyedWorkItem = new WindmillKeyedWorkItem<>(key, workItem, windowCoder, windowsCoder, valueCoder);
final boolean isEmptyWorkItem = (Iterables.isEmpty(keyedWorkItem.timersIterable()) && Iterables.isEmpty(keyedWorkItem.elementsIterable()));
final WindowedValue<KeyedWorkItem<K, T>> value = new ValueInEmptyWindows<>(keyedWorkItem);
// Return a noop iterator when current workitem is an empty workitem.
if (isEmptyWorkItem) {
return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {
@Override
public boolean start() throws IOException {
return false;
}
@Override
public boolean advance() throws IOException {
return false;
}
@Override
public WindowedValue<KeyedWorkItem<K, T>> getCurrent() {
throw new NoSuchElementException();
}
};
} else {
return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {
private WindowedValue<KeyedWorkItem<K, T>> current;
@Override
public boolean start() throws IOException {
current = value;
return true;
}
@Override
public boolean advance() throws IOException {
current = null;
return false;
}
@Override
public WindowedValue<KeyedWorkItem<K, T>> getCurrent() {
if (current == null) {
throw new NoSuchElementException();
}
return value;
}
};
}
}
Aggregations