Search in sources :

Example 16 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class GroupAlsoByWindowsParDoFn method createRunner.

/**
 * Composes and returns a {@link DoFnRunner} based on the parameters.
 *
 * <p>A {@code SimpleOldDoFnRunner} executes the {@link GroupAlsoByWindowFn}.
 *
 * <p>A {@link LateDataDroppingDoFnRunner} handles late data dropping for a {@link
 * StreamingGroupAlsoByWindowViaWindowSetFn}.
 *
 * <p>A {@link StreamingSideInputDoFnRunner} handles streaming side inputs.
 *
 * <p>A {@link StreamingKeyedWorkItemSideInputDoFnRunner} handles streaming side inputs for a
 * {@link StreamingGroupAlsoByWindowViaWindowSetFn}.
 */
private DoFnRunner<InputT, KV<K, Iterable<V>>> createRunner() {
    OutputManager outputManager = new OutputManager() {

        @Override
        public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
            checkState(tag.equals(mainOutputTag), "Must only output to main output tag (%s), but was %s", tag, mainOutputTag);
            try {
                receiver.process(output);
            } catch (Throwable t) {
                throw new RuntimeException(t);
            }
        }
    };
    boolean hasStreamingSideInput = options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty();
    DoFnRunner<InputT, KV<K, Iterable<V>>> basicRunner = new GroupAlsoByWindowFnRunner<>(options, doFn, sideInputReader, outputManager, mainOutputTag, stepContext);
    if (doFn instanceof StreamingGroupAlsoByWindowViaWindowSetFn) {
        DoFnRunner<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> streamingGABWRunner = (DoFnRunner<KeyedWorkItem<K, V>, KV<K, Iterable<V>>>) basicRunner;
        if (hasStreamingSideInput) {
            @SuppressWarnings("unchecked") WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<K, V> keyedWorkItemCoder = (WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<K, V>) inputCoder;
            StreamingSideInputFetcher<V, W> sideInputFetcher = new StreamingSideInputFetcher<>(sideInputViews, keyedWorkItemCoder.getElementCoder(), windowingStrategy, (StreamingModeExecutionContext.StreamingModeStepContext) stepContext);
            streamingGABWRunner = new StreamingKeyedWorkItemSideInputDoFnRunner<>(streamingGABWRunner, keyedWorkItemCoder.getKeyCoder(), sideInputFetcher, stepContext);
        }
        return (DoFnRunner<InputT, KV<K, Iterable<V>>>) DoFnRunners.<K, V, Iterable<V>, W>lateDataDroppingRunner(streamingGABWRunner, stepContext.timerInternals(), windowingStrategy);
    } else {
        if (hasStreamingSideInput) {
            return new StreamingSideInputDoFnRunner<>(basicRunner, new StreamingSideInputFetcher<>(sideInputViews, inputCoder, windowingStrategy, (StreamingModeExecutionContext.StreamingModeStepContext) stepContext));
        } else {
            return basicRunner;
        }
    }
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) LateDataDroppingDoFnRunner(org.apache.beam.runners.core.LateDataDroppingDoFnRunner) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 17 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testEmpty.

@Test
public void testEmpty() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    runner.startBundle();
    runner.finishBundle();
    List<?> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(0));
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) Test(org.junit.Test)

Example 18 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindows.

@Test
public void testSlidingWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(5));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(3));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1")), equalTo(new Instant(2)), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 19 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method createRunner.

@SuppressWarnings("unchecked")
private StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> createRunner(DoFnRunners.OutputManager outputManager) throws Exception {
    CoderRegistry registry = CoderRegistry.createDefault();
    Coder<String> keyCoder = StringUtf8Coder.of();
    Coder<Integer> inputCoder = BigEndianIntegerCoder.of();
    AppliedCombineFn<String, Integer, ?, Integer> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), registry, KvCoder.of(keyCoder, inputCoder));
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(WINDOW_FN);
    @SuppressWarnings("rawtypes") StreamingGroupAlsoByWindowViaWindowSetFn doFn = (StreamingGroupAlsoByWindowViaWindowSetFn) StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, key -> state, combineFn, keyCoder);
    DoFnRunner<KeyedWorkItem<String, Integer>, KV<String, Integer>> simpleDoFnRunner = new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), doFn.asDoFn(), mockSideInputReader, outputManager, mainOutputTag, stepContext);
    return new StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow>(simpleDoFnRunner, keyCoder, sideInputFetcher, stepContext);
}
Also used : Arrays(java.util.Arrays) KV(org.apache.beam.sdk.values.KV) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) KeyedWorkItems(org.apache.beam.runners.core.KeyedWorkItems) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) Matchers(org.mockito.Matchers) Mock(org.mockito.Mock) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) TimerInternals(org.apache.beam.runners.core.TimerInternals) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) MockitoAnnotations(org.mockito.MockitoAnnotations) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) TupleTag(org.apache.beam.sdk.values.TupleTag) StateInternals(org.apache.beam.runners.core.StateInternals) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) SideInputReader(org.apache.beam.runners.core.SideInputReader) Before(org.junit.Before) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) KvCoder(org.apache.beam.sdk.coders.KvCoder) AppliedCombineFn(org.apache.beam.sdk.util.AppliedCombineFn) Timer(org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Set(java.util.Set) Sum(org.apache.beam.sdk.transforms.Sum) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) List(java.util.List) BagState(org.apache.beam.sdk.state.BagState) Instant(org.joda.time.Instant) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) ValueInEmptyWindows(org.apache.beam.runners.dataflow.worker.util.ValueInEmptyWindows) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 20 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class GroupByKeyOp method open.

@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<K>> timerRegistry, OpEmitter<KV<K, OutputT>> emitter) {
    final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.pipelineOptions = samzaExecutionContext.getPipelineOptions();
    final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
    final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
    this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), keyCoder, pipelineOptions.getStoreBatchGetSize());
    this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
    final DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn = GroupAlsoByWindowViaWindowSetNewDoFn.create(windowingStrategy, stateInternalsFactory, timerInternalsFactory, NullSideInputReader.of(Collections.emptyList()), reduceFn, outputManager, mainOutputTag);
    final KeyedInternals<K> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
    final StepContext stepContext = new StepContext() {

        @Override
        public StateInternals stateInternals() {
            return keyedInternals.stateInternals();
        }

        @Override
        public TimerInternals timerInternals() {
            return keyedInternals.timerInternals();
        }
    };
    final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), doFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.fnRunner = DoFnRunnerWithMetrics.wrap(doFnRunner, executionContext.getMetricsContainer(), transformFullName);
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) StepContext(org.apache.beam.runners.core.StepContext) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem)

Aggregations

KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)20 TupleTag (org.apache.beam.sdk.values.TupleTag)16 KV (org.apache.beam.sdk.values.KV)15 Instant (org.joda.time.Instant)14 Test (org.junit.Test)13 WindowedValue (org.apache.beam.sdk.util.WindowedValue)11 ListOutputManager (org.apache.beam.runners.dataflow.worker.util.ListOutputManager)9 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)9 InputMessageBundle (org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle)6 WorkItem (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 PCollection (org.apache.beam.sdk.values.PCollection)4 OutputManager (org.apache.beam.runners.core.DoFnRunners.OutputManager)3 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)3 KvCoder (org.apache.beam.sdk.coders.KvCoder)3 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)3 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)3 Collection (java.util.Collection)2 List (java.util.List)2