Search in sources :

Example 1 with StepContext

use of org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StepContext in project beam by apache.

the class StreamingModeExecutionContext method flushState.

public Map<Long, Runnable> flushState() {
    Map<Long, Runnable> callbacks = new HashMap<>();
    for (StepContext stepContext : getAllStepContexts()) {
        stepContext.flushState();
    }
    if (activeReader != null) {
        Windmill.SourceState.Builder sourceStateBuilder = outputBuilder.getSourceStateUpdatesBuilder();
        final UnboundedSource.CheckpointMark checkpointMark = activeReader.getCheckpointMark();
        final Instant watermark = activeReader.getWatermark();
        long id = ThreadLocalRandom.current().nextLong();
        sourceStateBuilder.addFinalizeIds(id);
        callbacks.put(id, () -> {
            try {
                checkpointMark.finalizeCheckpoint();
            } catch (IOException e) {
                throw new RuntimeException("Exception while finalizing checkpoint", e);
            }
        });
        @SuppressWarnings("unchecked") Coder<UnboundedSource.CheckpointMark> checkpointCoder = ((UnboundedSource<?, UnboundedSource.CheckpointMark>) activeReader.getCurrentSource()).getCheckpointMarkCoder();
        if (checkpointCoder != null) {
            ByteString.Output stream = ByteString.newOutput();
            try {
                checkpointCoder.encode(checkpointMark, stream, Coder.Context.OUTER);
            } catch (IOException e) {
                throw new RuntimeException("Exception while encoding checkpoint", e);
            }
            sourceStateBuilder.setState(stream.toByteString());
        }
        outputBuilder.setSourceWatermark(WindmillTimeUtils.harnessToWindmillTimestamp(watermark));
        backlogBytes = activeReader.getSplitBacklogBytes();
        if (backlogBytes == UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN && WorkerCustomSources.isFirstUnboundedSourceSplit(getSerializedKey())) {
            // Only call getTotalBacklogBytes() on the first split.
            backlogBytes = activeReader.getTotalBacklogBytes();
        }
        outputBuilder.setSourceBacklogBytes(backlogBytes);
        readerCache.cacheReader(getComputationKey(), getWork().getCacheToken(), getWork().getWorkToken(), activeReader);
        activeReader = null;
    }
    return callbacks;
}
Also used : StepContext(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StepContext) HashMap(java.util.HashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Instant(org.joda.time.Instant) IOException(java.io.IOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource)

Example 2 with StepContext

use of org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StepContext in project beam by apache.

the class StreamingModeExecutionContext method start.

public void start(@Nullable Object key, Windmill.WorkItem work, Instant inputDataWatermark, @Nullable Instant outputDataWatermark, @Nullable Instant synchronizedProcessingTime, WindmillStateReader stateReader, StateFetcher stateFetcher, Windmill.WorkItemCommitRequest.Builder outputBuilder) {
    this.key = key;
    this.work = work;
    this.computationKey = WindmillComputationKey.create(computationId, work.getKey(), work.getShardingKey());
    this.stateFetcher = stateFetcher;
    this.outputBuilder = outputBuilder;
    this.sideInputCache.clear();
    clearSinkFullHint();
    Instant processingTime = Instant.now();
    // timers.  Otherwise a trigger could ignore the timer and orphan the window.
    for (Windmill.Timer timer : work.getTimers().getTimersList()) {
        if (timer.getType() == Windmill.Timer.Type.REALTIME) {
            Instant inferredFiringTime = WindmillTimeUtils.windmillToHarnessTimestamp(timer.getTimestamp()).plus(Duration.millis(1));
            if (inferredFiringTime.isAfter(processingTime)) {
                processingTime = inferredFiringTime;
            }
        }
    }
    Collection<? extends StepContext> stepContexts = getAllStepContexts();
    if (!stepContexts.isEmpty()) {
        // This must be only created once for the workItem as token validation will fail if the same
        // work token is reused.
        WindmillStateCache.ForKey cacheForKey = stateCache.forKey(getComputationKey(), getWork().getCacheToken(), getWorkToken());
        for (StepContext stepContext : stepContexts) {
            stepContext.start(stateReader, inputDataWatermark, processingTime, cacheForKey, outputDataWatermark, synchronizedProcessingTime);
        }
    }
}
Also used : StepContext(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StepContext) Timer(org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer) Instant(org.joda.time.Instant) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill)

Aggregations

StepContext (org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StepContext)2 Instant (org.joda.time.Instant)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Windmill (org.apache.beam.runners.dataflow.worker.windmill.Windmill)1 Timer (org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer)1 UnboundedSource (org.apache.beam.sdk.io.UnboundedSource)1 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)1