Search in sources :

Example 61 with Instant

use of org.joda.time.Instant in project beam by apache.

the class SimpleDoFnRunner method onTimer.

@Override
public void onTimer(String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
    // The effective timestamp is when derived elements will have their timestamp set, if not
    // otherwise specified. If this is an event time timer, then they have the timestamp of the
    // timer itself. Otherwise, they are set to the input timestamp, which is by definition
    // non-late.
    Instant effectiveTimestamp;
    switch(timeDomain) {
        case EVENT_TIME:
            effectiveTimestamp = timestamp;
            break;
        case PROCESSING_TIME:
        case SYNCHRONIZED_PROCESSING_TIME:
            effectiveTimestamp = stepContext.timerInternals().currentInputWatermarkTime();
            break;
        default:
            throw new IllegalArgumentException(String.format("Unknown time domain: %s", timeDomain));
    }
    OnTimerArgumentProvider argumentProvider = new OnTimerArgumentProvider(window, effectiveTimestamp, timeDomain);
    invoker.invokeOnTimer(timerId, argumentProvider);
}
Also used : Instant(org.joda.time.Instant)

Example 62 with Instant

use of org.joda.time.Instant in project beam by apache.

the class StateMerging method mergeWatermarks.

/**
   * Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
   * into {@code result}, where the final merge result window is {@code mergeResult}.
   */
public static <W extends BoundedWindow> void mergeWatermarks(Collection<WatermarkHoldState> sources, WatermarkHoldState result, W resultWindow) {
    if (sources.isEmpty()) {
        // Nothing to merge.
        return;
    }
    if (sources.size() == 1 && sources.contains(result) && result.getTimestampCombiner().dependsOnlyOnEarliestTimestamp()) {
        // Nothing to merge.
        return;
    }
    if (result.getTimestampCombiner().dependsOnlyOnWindow()) {
        // Clear sources.
        for (WatermarkHoldState source : sources) {
            source.clear();
        }
        // Update directly from window-derived hold.
        Instant hold = result.getTimestampCombiner().assign(resultWindow, BoundedWindow.TIMESTAMP_MIN_VALUE);
        checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
        result.add(hold);
    } else {
        // Prefetch.
        List<ReadableState<Instant>> futures = new ArrayList<>(sources.size());
        for (WatermarkHoldState source : sources) {
            futures.add(source);
        }
        // Read.
        List<Instant> outputTimesToMerge = new ArrayList<>(sources.size());
        for (ReadableState<Instant> future : futures) {
            Instant sourceOutputTime = future.read();
            if (sourceOutputTime != null) {
                outputTimesToMerge.add(sourceOutputTime);
            }
        }
        // Clear sources.
        for (WatermarkHoldState source : sources) {
            source.clear();
        }
        if (!outputTimesToMerge.isEmpty()) {
            // Merge and update.
            result.add(result.getTimestampCombiner().merge(resultWindow, outputTimesToMerge));
        }
    }
}
Also used : Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) ReadableState(org.apache.beam.sdk.state.ReadableState) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState)

Example 63 with Instant

use of org.joda.time.Instant in project beam by apache.

the class ReduceFnRunner method onTimers.

public void onTimers(Iterable<TimerData> timers) throws Exception {
    if (!timers.iterator().hasNext()) {
        return;
    }
    // Create a reusable context for each timer and begin prefetching necessary
    // state.
    List<EnrichedTimerData> enrichedTimers = new LinkedList();
    for (TimerData timer : timers) {
        checkArgument(timer.getNamespace() instanceof WindowNamespace, "Expected timer to be in WindowNamespace, but was in %s", timer.getNamespace());
        @SuppressWarnings("unchecked") WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
        W window = windowNamespace.getWindow();
        ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> directContext = contextFactory.base(window, StateStyle.DIRECT);
        ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> renamedContext = contextFactory.base(window, StateStyle.RENAMED);
        EnrichedTimerData enrichedTimer = new EnrichedTimerData(timer, directContext, renamedContext);
        enrichedTimers.add(enrichedTimer);
        // Perform prefetching of state to determine if the trigger should fire.
        if (enrichedTimer.isGarbageCollection) {
            triggerRunner.prefetchIsClosed(directContext.state());
        } else {
            triggerRunner.prefetchShouldFire(directContext.window(), directContext.state());
        }
    }
    // For those windows that are active and open, prefetch the triggering or emitting state.
    for (EnrichedTimerData timer : enrichedTimers) {
        if (timer.windowIsActiveAndOpen()) {
            ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> directContext = timer.directContext;
            if (timer.isGarbageCollection) {
                prefetchOnTrigger(directContext, timer.renamedContext);
            } else if (triggerRunner.shouldFire(directContext.window(), directContext.timers(), directContext.state())) {
                prefetchEmit(directContext, timer.renamedContext);
            }
        }
    }
    // Perform processing now that everything is prefetched.
    for (EnrichedTimerData timer : enrichedTimers) {
        ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> directContext = timer.directContext;
        ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> renamedContext = timer.renamedContext;
        if (timer.isGarbageCollection) {
            WindowTracing.debug("ReduceFnRunner.onTimer: Cleaning up for key:{}; window:{} at {} with " + "inputWatermark:{}; outputWatermark:{}", key, directContext.window(), timer.timestamp, timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
            boolean windowIsActiveAndOpen = timer.windowIsActiveAndOpen();
            if (windowIsActiveAndOpen) {
                // We need to call onTrigger to emit the final pane if required.
                // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted,
                // and the watermark has passed the end of the window.
                @Nullable Instant newHold = onTrigger(directContext, renamedContext, true, /* isFinished */
                timer.isEndOfWindow);
                checkState(newHold == null, "Hold placed at %s despite isFinished being true.", newHold);
            }
            // Cleanup flavor B: Clear all the remaining state for this window since we'll never
            // see elements for it again.
            clearAllState(directContext, renamedContext, windowIsActiveAndOpen);
        } else {
            WindowTracing.debug("ReduceFnRunner.onTimer: Triggering for key:{}; window:{} at {} with " + "inputWatermark:{}; outputWatermark:{}", key, directContext.window(), timer.timestamp, timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
            if (timer.windowIsActiveAndOpen() && triggerRunner.shouldFire(directContext.window(), directContext.timers(), directContext.state())) {
                emit(directContext, renamedContext);
            }
            if (timer.isEndOfWindow) {
                // If the window strategy trigger includes a watermark trigger then at this point
                // there should be no data holds, either because we'd already cleared them on an
                // earlier onTrigger, or because we just cleared them on the above emit.
                // We could assert this but it is very expensive.
                // Since we are processing an on-time firing we should schedule the garbage collection
                // timer. (If getAllowedLateness is zero then the timer event will be considered a
                // cleanup event and handled by the above).
                // Note we must do this even if the trigger is finished so that we are sure to cleanup
                // any final trigger finished bits.
                checkState(windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO), "Unexpected zero getAllowedLateness");
                Instant cleanupTime = LateDataUtils.garbageCollectionTime(directContext.window(), windowingStrategy);
                WindowTracing.debug("ReduceFnRunner.onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with " + "inputWatermark:{}; outputWatermark:{}", key, directContext.window(), cleanupTime, timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
                checkState(!cleanupTime.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "Cleanup time %s is beyond end-of-time", cleanupTime);
                directContext.timers().setTimer(cleanupTime, TimeDomain.EVENT_TIME);
            }
        }
    }
}
Also used : Instant(org.joda.time.Instant) LinkedList(java.util.LinkedList) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) Nullable(javax.annotation.Nullable)

Example 64 with Instant

use of org.joda.time.Instant in project beam by apache.

the class ReduceFnRunner method onTrigger.

/**
   * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
   *
   * @return output watermark hold added, or {@literal null} if none.
   */
@Nullable
private Instant onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> directContext, ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> renamedContext, boolean isFinished, boolean isEndOfWindow) throws Exception {
    Instant inputWM = timerInternals.currentInputWatermarkTime();
    // Calculate the pane info.
    final PaneInfo pane = paneInfoTracker.getNextPaneInfo(directContext, isFinished).read();
    // Extract the window hold, and as a side effect clear it.
    final WatermarkHold.OldAndNewHolds pair = watermarkHold.extractAndRelease(renamedContext, isFinished).read();
    // TODO: This isn't accurate if the elements are late. See BEAM-2262
    final Instant outputTimestamp = pair.oldHold;
    @Nullable Instant newHold = pair.newHold;
    final boolean isEmpty = nonEmptyPanes.isEmpty(renamedContext.state()).read();
    if (newHold != null) {
        // We can't be finished yet.
        checkState(!isFinished, "new hold at %s but finished %s", newHold, directContext.window());
        // The hold cannot be behind the input watermark.
        checkState(!newHold.isBefore(inputWM), "new hold %s is before input watermark %s", newHold, inputWM);
        if (newHold.isAfter(directContext.window().maxTimestamp())) {
            // The hold must be for garbage collection, which can't have happened yet.
            checkState(newHold.isEqual(LateDataUtils.garbageCollectionTime(directContext.window(), windowingStrategy)), "new hold %s should be at garbage collection for window %s plus %s", newHold, directContext.window(), windowingStrategy.getAllowedLateness());
        } else {
            // The hold must be for the end-of-window, which can't have happened yet.
            checkState(newHold.isEqual(directContext.window().maxTimestamp()), "new hold %s should be at end of window %s", newHold, directContext.window());
            checkState(!isEndOfWindow, "new hold at %s for %s but this is the watermark trigger", newHold, directContext.window());
        }
    }
    // Only emit a pane if it has data or empty panes are observable.
    if (needToEmit(isEmpty, isFinished, pane.getTiming())) {
        // Run reduceFn.onTrigger method.
        final List<W> windows = Collections.singletonList(directContext.window());
        ReduceFn<K, InputT, OutputT, W>.OnTriggerContext<K, InputT, OutputT, W> renamedTriggerContext = contextFactory.forTrigger(directContext.window(), pane, StateStyle.RENAMED, new OnTriggerCallbacks<OutputT>() {

            @Override
            public void output(OutputT toOutput) {
                // We're going to output panes, so commit the (now used) PaneInfo.
                // TODO: This is unnecessary if the trigger isFinished since the saved
                // state will be immediately deleted.
                paneInfoTracker.storeCurrentPaneInfo(directContext, pane);
                // Output the actual value.
                outputter.outputWindowedValue(KV.of(key, toOutput), outputTimestamp, windows, pane);
            }
        });
        reduceFn.onTrigger(renamedTriggerContext);
    }
    return newHold;
}
Also used : Instant(org.joda.time.Instant) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 65 with Instant

use of org.joda.time.Instant in project beam by apache.

the class WatermarkHold method extractAndRelease.

/**
   * Return (a future for) the earliest hold for {@code context}. Clear all the holds after
   * reading, but add/restore an end-of-window or garbage collection hold if required.
   *
   * <p>The returned timestamp is the output timestamp according to the {@link TimestampCombiner}
   * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
   * elements in the current pane. If there is no such value the timestamp is the end
   * of the window.
   */
public ReadableState<OldAndNewHolds> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, final boolean isFinished) {
    WindowTracing.debug("WatermarkHold.extractAndRelease: for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", context.key(), context.window(), timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
    final WatermarkHoldState elementHoldState = context.state().access(elementHoldTag);
    final WatermarkHoldState extraHoldState = context.state().access(EXTRA_HOLD_TAG);
    return new ReadableState<OldAndNewHolds>() {

        @Override
        public ReadableState<OldAndNewHolds> readLater() {
            elementHoldState.readLater();
            extraHoldState.readLater();
            return this;
        }

        @Override
        public OldAndNewHolds read() {
            // Read both the element and extra holds.
            Instant elementHold = elementHoldState.read();
            Instant extraHold = extraHoldState.read();
            Instant oldHold;
            // Find the minimum, accounting for null.
            if (elementHold == null) {
                oldHold = extraHold;
            } else if (extraHold == null) {
                oldHold = elementHold;
            } else if (elementHold.isBefore(extraHold)) {
                oldHold = elementHold;
            } else {
                oldHold = extraHold;
            }
            if (oldHold == null || oldHold.isAfter(context.window().maxTimestamp())) {
                // If no hold (eg because all elements came in behind the output watermark), or
                // the hold was for garbage collection, take the end of window as the result.
                WindowTracing.debug("WatermarkHold.extractAndRelease.read: clipping from {} to end of window " + "for key:{}; window:{}", oldHold, context.key(), context.window());
                oldHold = context.window().maxTimestamp();
            }
            WindowTracing.debug("WatermarkHold.extractAndRelease.read: clearing for key:{}; window:{}", context.key(), context.window());
            // Clear the underlying state to allow the output watermark to progress.
            elementHoldState.clear();
            extraHoldState.clear();
            @Nullable Instant newHold = null;
            if (!isFinished) {
                // Only need to leave behind an end-of-window or garbage collection hold
                // if future elements will be processed.
                newHold = addEndOfWindowOrGarbageCollectionHolds(context, true);
            }
            return new OldAndNewHolds(oldHold, newHold);
        }
    };
}
Also used : Instant(org.joda.time.Instant) ReadableState(org.apache.beam.sdk.state.ReadableState) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Nullable(javax.annotation.Nullable)

Aggregations

Instant (org.joda.time.Instant)410 Test (org.junit.Test)326 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)135 KV (org.apache.beam.sdk.values.KV)64 Category (org.junit.experimental.categories.Category)60 WindowedValue (org.apache.beam.sdk.util.WindowedValue)47 Duration (org.joda.time.Duration)44 ReadableInstant (org.joda.time.ReadableInstant)36 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)30 WatermarkHoldState (org.apache.beam.sdk.state.WatermarkHoldState)24 TimerData (org.apache.beam.runners.core.TimerInternals.TimerData)22 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)22 HashMap (java.util.HashMap)19 TransformWatermarks (org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks)18 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)17 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)17 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)17 TupleTag (org.apache.beam.sdk.values.TupleTag)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)12