Search in sources :

Example 1 with PaneInfo

use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.

the class ReduceFnRunner method onTrigger.

/**
   * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
   *
   * @return output watermark hold added, or {@literal null} if none.
   */
@Nullable
private Instant onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> directContext, ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> renamedContext, boolean isFinished, boolean isEndOfWindow) throws Exception {
    Instant inputWM = timerInternals.currentInputWatermarkTime();
    // Calculate the pane info.
    final PaneInfo pane = paneInfoTracker.getNextPaneInfo(directContext, isFinished).read();
    // Extract the window hold, and as a side effect clear it.
    final WatermarkHold.OldAndNewHolds pair = watermarkHold.extractAndRelease(renamedContext, isFinished).read();
    // TODO: This isn't accurate if the elements are late. See BEAM-2262
    final Instant outputTimestamp = pair.oldHold;
    @Nullable Instant newHold = pair.newHold;
    final boolean isEmpty = nonEmptyPanes.isEmpty(renamedContext.state()).read();
    if (newHold != null) {
        // We can't be finished yet.
        checkState(!isFinished, "new hold at %s but finished %s", newHold, directContext.window());
        // The hold cannot be behind the input watermark.
        checkState(!newHold.isBefore(inputWM), "new hold %s is before input watermark %s", newHold, inputWM);
        if (newHold.isAfter(directContext.window().maxTimestamp())) {
            // The hold must be for garbage collection, which can't have happened yet.
            checkState(newHold.isEqual(LateDataUtils.garbageCollectionTime(directContext.window(), windowingStrategy)), "new hold %s should be at garbage collection for window %s plus %s", newHold, directContext.window(), windowingStrategy.getAllowedLateness());
        } else {
            // The hold must be for the end-of-window, which can't have happened yet.
            checkState(newHold.isEqual(directContext.window().maxTimestamp()), "new hold %s should be at end of window %s", newHold, directContext.window());
            checkState(!isEndOfWindow, "new hold at %s for %s but this is the watermark trigger", newHold, directContext.window());
        }
    }
    // Only emit a pane if it has data or empty panes are observable.
    if (needToEmit(isEmpty, isFinished, pane.getTiming())) {
        // Run reduceFn.onTrigger method.
        final List<W> windows = Collections.singletonList(directContext.window());
        ReduceFn<K, InputT, OutputT, W>.OnTriggerContext<K, InputT, OutputT, W> renamedTriggerContext = contextFactory.forTrigger(directContext.window(), pane, StateStyle.RENAMED, new OnTriggerCallbacks<OutputT>() {

            @Override
            public void output(OutputT toOutput) {
                // We're going to output panes, so commit the (now used) PaneInfo.
                // TODO: This is unnecessary if the trigger isFinished since the saved
                // state will be immediately deleted.
                paneInfoTracker.storeCurrentPaneInfo(directContext, pane);
                // Output the actual value.
                outputter.outputWindowedValue(KV.of(key, toOutput), outputTimestamp, windows, pane);
            }
        });
        reduceFn.onTrigger(renamedTriggerContext);
    }
    return newHold;
}
Also used : Instant(org.joda.time.Instant) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 2 with PaneInfo

use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.

the class WindowedValueTest method testExplodeWindowsManyWindowsMultipleWindowedValues.

@Test
public void testExplodeWindowsManyWindowsMultipleWindowedValues() {
    Instant now = Instant.now();
    BoundedWindow centerWindow = new IntervalWindow(now.minus(1000L), now.plus(1000L));
    BoundedWindow pastWindow = new IntervalWindow(now.minus(1500L), now.plus(500L));
    BoundedWindow futureWindow = new IntervalWindow(now.minus(500L), now.plus(1500L));
    BoundedWindow futureFutureWindow = new IntervalWindow(now, now.plus(2000L));
    PaneInfo pane = PaneInfo.createPane(false, false, Timing.ON_TIME, 3L, 0L);
    WindowedValue<String> value = WindowedValue.of("foo", now, ImmutableList.of(pastWindow, centerWindow, futureWindow, futureFutureWindow), pane);
    assertThat(value.explodeWindows(), containsInAnyOrder(WindowedValue.of("foo", now, futureFutureWindow, pane), WindowedValue.of("foo", now, futureWindow, pane), WindowedValue.of("foo", now, centerWindow, pane), WindowedValue.of("foo", now, pastWindow, pane)));
}
Also used : Instant(org.joda.time.Instant) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 3 with PaneInfo

use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.

the class DefaultFilenamePolicy method windowedFilename.

@Override
public ResourceId windowedFilename(ResourceId outputDirectory, WindowedContext context, String extension) {
    boolean shardTemplateProvidedByUser = !this.shardTemplate.equals(DEFAULT_SHARD_TEMPLATE);
    if (shardTemplateProvidedByUser) {
        boolean isWindowed = isWindowedTemplate(this.shardTemplate);
        if (!isWindowed) {
            LOG.info("Template you provided {} does not have enough information to create" + "meaningful windowed file names. Consider using P and W in your template", this.shardTemplate);
        }
    }
    final PaneInfo paneInfo = context.getPaneInfo();
    String paneStr = paneInfoToString(paneInfo);
    String windowStr = windowToString(context.getWindow());
    String templateToUse = shardTemplate;
    if (!shardTemplateProvidedByUser) {
        LOG.info("User did not provide shard template. For creating windowed file names " + "default template {} will be used", DEFAULT_WINDOWED_SHARD_TEMPLATE);
        templateToUse = DEFAULT_WINDOWED_SHARD_TEMPLATE;
    }
    String filename = constructName(prefix.get(), templateToUse, suffix, context.getShardNumber(), context.getNumShards(), paneStr, windowStr) + extension;
    return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
}
Also used : PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo)

Example 4 with PaneInfo

use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.

the class ApexParDoOperator method setup.

@Override
public void setup(OperatorContext context) {
    this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
    SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
    if (!sideInputs.isEmpty()) {
        sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
        sideInputReader = sideInputHandler;
    }
    for (int i = 0; i < additionalOutputTags.size(); i++) {
        @SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
        additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
    }
    NoOpStepContext stepContext = new NoOpStepContext() {

        @Override
        public StateInternals stateInternals() {
            return currentKeyStateInternals;
        }

        @Override
        public TimerInternals timerInternals() {
            return currentKeyTimerInternals;
        }
    };
    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
    doFnInvoker.invokeSetup();
    if (this.currentKeyStateInternals != null) {
        StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
        @SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
        @SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
        doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
    }
    pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
    if (doFn instanceof ProcessFn) {
        @SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
        @SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
        splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
        TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {

            @Override
            public TimerInternals timerInternalsForKey(String key) {
                return currentKeyTimerInternals;
            }
        };
        splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
        splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {

            @Override
            public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
            }

            @Override
            public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(tag, WindowedValue.of(output, timestamp, windows, pane));
            }
        }, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
    }
}
Also used : RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) TupleTag(org.apache.beam.sdk.values.TupleTag) SideInputReader(org.apache.beam.runners.core.SideInputReader) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) NoOpStepContext(org.apache.beam.runners.apex.translation.utils.NoOpStepContext) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DefaultOutputPort(com.datatorrent.api.DefaultOutputPort) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KeyedWorkItemCoder(org.apache.beam.runners.core.KeyedWorkItemCoder) ListCoder(org.apache.beam.sdk.coders.ListCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) TimerInternalsFactory(org.apache.beam.runners.core.TimerInternalsFactory) Instant(org.joda.time.Instant) StateInternalsFactory(org.apache.beam.runners.core.StateInternalsFactory) Collection(java.util.Collection)

Example 5 with PaneInfo

use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.

the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.

@Test
public void testGlobalWindowMinTimestamp() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
    PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
    input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
    ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
    operator.setup(null);
    operator.beginWindow(1);
    Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
    final List<Object> results = Lists.newArrayList();
    Sink<Object> sink = new Sink<Object>() {

        @Override
        public void put(Object tuple) {
            results.add(tuple);
        }

        @Override
        public int getCount(boolean reset) {
            return 0;
        }
    };
    operator.output.setSink(sink);
    operator.setup(null);
    operator.beginWindow(1);
    Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
    BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
    PaneInfo paneInfo = PaneInfo.NO_FIRING;
    WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
    WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
    ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
    Assert.assertEquals("number outputs", 0, results.size());
    operator.input.process(watermark);
    Assert.assertEquals("number outputs", 2, results.size());
    @SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
    List<Integer> counts = Lists.newArrayList(1, 1);
    Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
    Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Also used : ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) Sink(com.datatorrent.api.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) ApexGroupByKeyOperator(org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator) ApexStateInternals(org.apache.beam.runners.apex.translation.utils.ApexStateInternals) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Aggregations

PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)9 Instant (org.joda.time.Instant)7 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 Collection (java.util.Collection)2 ApexStreamTuple (org.apache.beam.runners.apex.translation.utils.ApexStreamTuple)2 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)2 ProcessFn (org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn)2 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)2 WindowedValue (org.apache.beam.sdk.util.WindowedValue)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 DefaultOutputPort (com.datatorrent.api.DefaultOutputPort)1 Sink (com.datatorrent.api.Sink)1 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 Nullable (javax.annotation.Nullable)1 ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)1 ApexGroupByKeyOperator (org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator)1 ApexStateInternals (org.apache.beam.runners.apex.translation.utils.ApexStateInternals)1 NoOpStepContext (org.apache.beam.runners.apex.translation.utils.NoOpStepContext)1 OutputManager (org.apache.beam.runners.core.DoFnRunners.OutputManager)1 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)1