Search in sources :

Example 51 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class ApexParDoOperator method fireTimer.

@Override
public void fireTimer(Object key, Collection<TimerData> timerDataSet) {
    pushbackDoFnRunner.startBundle();
    @SuppressWarnings("unchecked") Coder<Object> keyCoder = (Coder) currentKeyStateInternals.getKeyCoder();
    ((StateInternalsProxy) currentKeyStateInternals).setKey(key);
    currentKeyTimerInternals.setContext(key, keyCoder, new Instant(this.currentInputWatermark), new Instant(this.currentOutputWatermark));
    for (TimerData timerData : timerDataSet) {
        StateNamespace namespace = timerData.getNamespace();
        checkArgument(namespace instanceof WindowNamespace);
        BoundedWindow window = ((WindowNamespace<?>) namespace).getWindow();
        pushbackDoFnRunner.onTimer(timerData.getTimerId(), window, timerData.getTimestamp(), timerData.getDomain());
    }
    pushbackDoFnRunner.finishBundle();
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KeyedWorkItemCoder(org.apache.beam.runners.core.KeyedWorkItemCoder) ListCoder(org.apache.beam.sdk.coders.ListCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) StateInternalsProxy(org.apache.beam.runners.apex.translation.utils.StateInternalsProxy) Instant(org.joda.time.Instant) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) StateNamespace(org.apache.beam.runners.core.StateNamespace)

Example 52 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class ApexParDoOperator method setup.

@Override
public void setup(OperatorContext context) {
    this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
    SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
    if (!sideInputs.isEmpty()) {
        sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
        sideInputReader = sideInputHandler;
    }
    for (int i = 0; i < additionalOutputTags.size(); i++) {
        @SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
        additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
    }
    NoOpStepContext stepContext = new NoOpStepContext() {

        @Override
        public StateInternals stateInternals() {
            return currentKeyStateInternals;
        }

        @Override
        public TimerInternals timerInternals() {
            return currentKeyTimerInternals;
        }
    };
    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
    doFnInvoker.invokeSetup();
    if (this.currentKeyStateInternals != null) {
        StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
        @SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
        @SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
        doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
    }
    pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
    if (doFn instanceof ProcessFn) {
        @SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
        @SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
        splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
        TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {

            @Override
            public TimerInternals timerInternalsForKey(String key) {
                return currentKeyTimerInternals;
            }
        };
        splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
        splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {

            @Override
            public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
            }

            @Override
            public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(tag, WindowedValue.of(output, timestamp, windows, pane));
            }
        }, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
    }
}
Also used : RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) TupleTag(org.apache.beam.sdk.values.TupleTag) SideInputReader(org.apache.beam.runners.core.SideInputReader) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) NoOpStepContext(org.apache.beam.runners.apex.translation.utils.NoOpStepContext) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DefaultOutputPort(com.datatorrent.api.DefaultOutputPort) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KeyedWorkItemCoder(org.apache.beam.runners.core.KeyedWorkItemCoder) ListCoder(org.apache.beam.sdk.coders.ListCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) TimerInternalsFactory(org.apache.beam.runners.core.TimerInternalsFactory) Instant(org.joda.time.Instant) StateInternalsFactory(org.apache.beam.runners.core.StateInternalsFactory) Collection(java.util.Collection)

Example 53 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.

@Test
public void testGlobalWindowMinTimestamp() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
    PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
    input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
    ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
    operator.setup(null);
    operator.beginWindow(1);
    Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
    final List<Object> results = Lists.newArrayList();
    Sink<Object> sink = new Sink<Object>() {

        @Override
        public void put(Object tuple) {
            results.add(tuple);
        }

        @Override
        public int getCount(boolean reset) {
            return 0;
        }
    };
    operator.output.setSink(sink);
    operator.setup(null);
    operator.beginWindow(1);
    Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
    BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
    PaneInfo paneInfo = PaneInfo.NO_FIRING;
    WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
    WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
    ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
    Assert.assertEquals("number outputs", 0, results.size());
    operator.input.process(watermark);
    Assert.assertEquals("number outputs", 2, results.size());
    @SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
    List<Integer> counts = Lists.newArrayList(1, 1);
    Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
    Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Also used : ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) Sink(com.datatorrent.api.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) ApexGroupByKeyOperator(org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator) ApexStateInternals(org.apache.beam.runners.apex.translation.utils.ApexStateInternals) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Example 54 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class SparkAssignWindowFn method call.

@Override
@SuppressWarnings("unchecked")
public WindowedValue<T> call(WindowedValue<T> windowedValue) throws Exception {
    final BoundedWindow boundedWindow = Iterables.getOnlyElement(windowedValue.getWindows());
    final T element = windowedValue.getValue();
    final Instant timestamp = windowedValue.getTimestamp();
    Collection<W> windows = ((WindowFn<T, W>) fn).assignWindows(((WindowFn<T, W>) fn).new AssignContext() {

        @Override
        public T element() {
            return element;
        }

        @Override
        public Instant timestamp() {
            return timestamp;
        }

        @Override
        public BoundedWindow window() {
            return boundedWindow;
        }
    });
    return WindowedValue.of(element, timestamp, windows, PaneInfo.NO_FIRING);
}
Also used : Instant(org.joda.time.Instant) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow)

Aggregations

BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)54 Instant (org.joda.time.Instant)27 Test (org.junit.Test)26 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)21 KV (org.apache.beam.sdk.values.KV)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)14 ArrayList (java.util.ArrayList)7 TimerSpec (org.apache.beam.sdk.state.TimerSpec)7 Timer (org.apache.beam.sdk.state.Timer)6 Matchers.containsString (org.hamcrest.Matchers.containsString)6 DoFn (org.apache.beam.sdk.transforms.DoFn)5 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)5 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4 ValueState (org.apache.beam.sdk.state.ValueState)4 OnTimer (org.apache.beam.sdk.transforms.DoFn.OnTimer)4 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)4 PCollection (org.apache.beam.sdk.values.PCollection)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Duration (org.joda.time.Duration)4