Search in sources :

Example 6 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSessions.

@Test
public void testSessions() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, "v3");
    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, "v0");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    // Note that the WATERMARK timer for Instant(9) will have been deleted by
    // ReduceFnRunner when window(0, 10) was merged away.
    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(0)), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(new Instant(15)), equalTo(window(15, 25)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 7 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSessionsCombine.

@Test
public void testSessionsCombine() throws Exception {
    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
    CombineFn<Long, ?, Long> combineFn = new SumLongs();
    CoderRegistry registry = CoderRegistry.createDefault();
    AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(combineFn, registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))), appliedCombineFn);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<Long> valueCoder = BigEndianLongCoder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, 1L);
    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, 2L);
    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, 3L);
    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, 4L);
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    // Note that the WATERMARK timer for Instant(9) will have been deleted by
    // ReduceFnRunner when window(0, 10) was merged away.
    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(7L)), equalTo(window(0, 15).maxTimestamp()), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(3L)), equalTo(window(15, 25).maxTimestamp()), equalTo(window(15, 25)))));
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 8 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class WindmillKeyedWorkItemTest method testElementIteration.

@Test
public void testElementIteration() throws Exception {
    Windmill.WorkItem.Builder workItem = Windmill.WorkItem.newBuilder().setKey(SERIALIZED_KEY).setWorkToken(17);
    Windmill.InputMessageBundle.Builder chunk1 = workItem.addMessageBundlesBuilder();
    chunk1.setSourceComputationId("computation");
    addElement(chunk1, 5, "hello", WINDOW_1, paneInfo(0));
    addElement(chunk1, 7, "world", WINDOW_2, paneInfo(2));
    Windmill.InputMessageBundle.Builder chunk2 = workItem.addMessageBundlesBuilder();
    chunk2.setSourceComputationId("computation");
    addElement(chunk2, 6, "earth", WINDOW_1, paneInfo(1));
    KeyedWorkItem<String, String> keyedWorkItem = new WindmillKeyedWorkItem<>(KEY, workItem.build(), WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
    assertThat(keyedWorkItem.elementsIterable(), Matchers.contains(WindowedValue.of("hello", new Instant(5), WINDOW_1, paneInfo(0)), WindowedValue.of("world", new Instant(7), WINDOW_2, paneInfo(2)), WindowedValue.of("earth", new Instant(6), WINDOW_1, paneInfo(1))));
}
Also used : Instant(org.joda.time.Instant) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) Test(org.junit.Test)

Example 9 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StatefulParDoEvaluatorFactoryTest method testUnprocessedElements.

/**
 * A test that explicitly delays a side input so that the main input will have to be reprocessed,
 * testing that {@code finishBundle()} re-assembles the GBK outputs correctly.
 */
@Test
public void testUnprocessedElements() throws Exception {
    // To test the factory, first we set up a pipeline and then we use the constructed
    // pipeline to create the right parameters to pass to the factory
    final String stateId = "my-state-id";
    // For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
    PCollection<KV<String, Integer>> mainInput = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.into(FixedWindows.of(Duration.millis(10))));
    final PCollectionView<List<Integer>> sideInput = pipeline.apply("Create side input", Create.of(42)).apply("Window side input", Window.into(FixedWindows.of(Duration.millis(10)))).apply("View side input", View.asList());
    TupleTag<Integer> mainOutput = new TupleTag<>();
    PCollection<Integer> produced = mainInput.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        }
    }, mainOutput, TupleTagList.empty(), Collections.singletonList(sideInput), DoFnSchemaInformation.create(), Collections.emptyMap())).get(mainOutput).setCoder(VarIntCoder.of());
    StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory<>(mockEvaluationContext, options);
    // This will be the stateful ParDo from the expansion
    AppliedPTransform<PCollection<KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
    // Then there will be a digging down to the step context to get the state internals
    when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
    when(mockExecutionContext.getStepContext(any())).thenReturn(mockStepContext);
    when(mockEvaluationContext.createBundle(Matchers.<PCollection<Integer>>any())).thenReturn(mockUncommittedBundle);
    when(mockStepContext.getTimerUpdate()).thenReturn(TimerUpdate.empty());
    // And digging to check whether the window is ready
    when(mockEvaluationContext.createSideInputReader(anyList())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.isReady(Matchers.any(), Matchers.any())).thenReturn(false);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    // A single bundle with some elements in the global window; it should register cleanup for the
    // global window state merely by having the evaluator created. The cleanup logic does not
    // depend on the window.
    String key = "hello";
    WindowedValue<KV<String, Integer>> firstKv = WindowedValue.of(KV.of(key, 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING);
    WindowedValue<KeyedWorkItem<String, KV<String, Integer>>> gbkOutputElement = firstKv.withValue(KeyedWorkItems.elementsWorkItem("hello", ImmutableList.of(firstKv, firstKv.withValue(KV.of(key, 13)), firstKv.withValue(KV.of(key, 15)))));
    CommittedBundle<KeyedWorkItem<String, KV<String, Integer>>> inputBundle = BUNDLE_FACTORY.createBundle((PCollection<KeyedWorkItem<String, KV<String, Integer>>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(producingTransform))).add(gbkOutputElement).commit(Instant.now());
    TransformEvaluator<KeyedWorkItem<String, KV<String, Integer>>> evaluator = factory.forApplication(producingTransform, inputBundle);
    evaluator.processElement(gbkOutputElement);
    // This should push back every element as a KV<String, Iterable<Integer>>
    // in the appropriate window. Since the keys are equal they are single-threaded
    TransformResult<KeyedWorkItem<String, KV<String, Integer>>> result = evaluator.finishBundle();
    List<Integer> pushedBackInts = new ArrayList<>();
    for (WindowedValue<? extends KeyedWorkItem<String, KV<String, Integer>>> unprocessedElement : result.getUnprocessedElements()) {
        assertThat(Iterables.getOnlyElement(unprocessedElement.getWindows()), equalTo((BoundedWindow) firstWindow));
        assertThat(unprocessedElement.getValue().key(), equalTo("hello"));
        for (WindowedValue<KV<String, Integer>> windowedKv : unprocessedElement.getValue().elementsIterable()) {
            pushedBackInts.add(windowedKv.getValue().getValue());
        }
    }
    assertThat(pushedBackInts, containsInAnyOrder(1, 13, 15));
}
Also used : ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) StateSpec(org.apache.beam.sdk.state.StateSpec) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) Matchers.anyList(org.mockito.Matchers.anyList) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) StatefulParDo(org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo) Test(org.junit.Test)

Example 10 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class WindowingWindmillReader method iterator.

@Override
public NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>> iterator() throws IOException {
    final K key = keyCoder.decode(context.getSerializedKey().newInput(), Coder.Context.OUTER);
    final WorkItem workItem = context.getWork();
    KeyedWorkItem<K, T> keyedWorkItem = new WindmillKeyedWorkItem<>(key, workItem, windowCoder, windowsCoder, valueCoder);
    final boolean isEmptyWorkItem = (Iterables.isEmpty(keyedWorkItem.timersIterable()) && Iterables.isEmpty(keyedWorkItem.elementsIterable()));
    final WindowedValue<KeyedWorkItem<K, T>> value = new ValueInEmptyWindows<>(keyedWorkItem);
    // Return a noop iterator when current workitem is an empty workitem.
    if (isEmptyWorkItem) {
        return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {

            @Override
            public boolean start() throws IOException {
                return false;
            }

            @Override
            public boolean advance() throws IOException {
                return false;
            }

            @Override
            public WindowedValue<KeyedWorkItem<K, T>> getCurrent() {
                throw new NoSuchElementException();
            }
        };
    } else {
        return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {

            private WindowedValue<KeyedWorkItem<K, T>> current;

            @Override
            public boolean start() throws IOException {
                current = value;
                return true;
            }

            @Override
            public boolean advance() throws IOException {
                current = null;
                return false;
            }

            @Override
            public WindowedValue<KeyedWorkItem<K, T>> getCurrent() {
                if (current == null) {
                    throw new NoSuchElementException();
                }
                return value;
            }
        };
    }
}
Also used : ValueInEmptyWindows(org.apache.beam.runners.dataflow.worker.util.ValueInEmptyWindows) WindowedValue(org.apache.beam.sdk.util.WindowedValue) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) NoSuchElementException(java.util.NoSuchElementException)

Aggregations

KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)20 TupleTag (org.apache.beam.sdk.values.TupleTag)16 KV (org.apache.beam.sdk.values.KV)15 Instant (org.joda.time.Instant)14 Test (org.junit.Test)13 WindowedValue (org.apache.beam.sdk.util.WindowedValue)11 ListOutputManager (org.apache.beam.runners.dataflow.worker.util.ListOutputManager)9 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)9 InputMessageBundle (org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle)6 WorkItem (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 PCollection (org.apache.beam.sdk.values.PCollection)4 OutputManager (org.apache.beam.runners.core.DoFnRunners.OutputManager)3 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)3 KvCoder (org.apache.beam.sdk.coders.KvCoder)3 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)3 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)3 Collection (java.util.Collection)2 List (java.util.List)2