Search in sources :

Example 1 with WatermarkFunction

use of org.apache.samza.operators.functions.WatermarkFunction in project samza by apache.

the class OperatorImpl method onMessageAsync.

public final CompletionStage<Void> onMessageAsync(M message, MessageCollector collector, TaskCoordinator coordinator) {
    this.numMessage.inc();
    long startNs = this.highResClock.nanoTime();
    CompletionStage<Collection<RM>> completableResultsFuture;
    try {
        completableResultsFuture = handleMessageAsync(message, collector, coordinator);
    } catch (ClassCastException e) {
        String actualType = e.getMessage().replaceFirst(" cannot be cast to .*", "");
        String expectedType = e.getMessage().replaceFirst(".* cannot be cast to ", "");
        throw new SamzaException(String.format("Error applying operator %s (created at %s) to its input message. " + "Expected input message to be of type %s, but found it to be of type %s. " + "Are Serdes for the inputs to this operator configured correctly?", getOpImplId(), getOperatorSpec().getSourceLocation(), expectedType, actualType), e);
    }
    CompletionStage<Void> result = completableResultsFuture.thenCompose(results -> {
        long endNs = this.highResClock.nanoTime();
        this.handleMessageNs.update(endNs - startNs);
        return CompletableFuture.allOf(results.stream().flatMap(r -> this.registeredOperators.stream().map(op -> op.onMessageAsync(r, collector, coordinator))).toArray(CompletableFuture[]::new));
    });
    WatermarkFunction watermarkFn = getOperatorSpec().getWatermarkFn();
    if (watermarkFn != null) {
        // check whether there is new watermark emitted from the user function
        Long outputWm = watermarkFn.getOutputWatermark();
        return result.thenCompose(ignored -> propagateWatermark(outputWm, collector, coordinator));
    }
    return result;
}
Also used : ScheduledFunction(org.apache.samza.operators.functions.ScheduledFunction) MetricsConfig(org.apache.samza.config.MetricsConfig) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskContext(org.apache.samza.context.TaskContext) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) Counter(org.apache.samza.metrics.Counter) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) CallbackScheduler(org.apache.samza.scheduler.CallbackScheduler) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) WatermarkMessage(org.apache.samza.system.WatermarkMessage) HighResolutionClock(org.apache.samza.util.HighResolutionClock) LinkedHashSet(java.util.LinkedHashSet) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Timer(org.apache.samza.metrics.Timer) Collection(java.util.Collection) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Scheduler(org.apache.samza.operators.Scheduler) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Context(org.apache.samza.context.Context) CompletionStage(java.util.concurrent.CompletionStage) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) InternalTaskContext(org.apache.samza.context.InternalTaskContext) CompletableFuture(java.util.concurrent.CompletableFuture) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) Collection(java.util.Collection) SamzaException(org.apache.samza.SamzaException)

Example 2 with WatermarkFunction

use of org.apache.samza.operators.functions.WatermarkFunction in project samza by apache.

the class OperatorImpl method onWatermark.

/**
 * A watermark comes from an upstream operator. This function decides whether we should update the
 * input watermark based on the watermark time of all the previous operators, and then call handleWatermark()
 * to let the inherited operator to act on it.
 * @param watermark incoming watermark from an upstream operator
 * @param collector message collector
 * @param coordinator task coordinator
 */
private CompletionStage<Void> onWatermark(long watermark, MessageCollector collector, TaskCoordinator coordinator) {
    final long inputWatermarkMin;
    if (prevOperators.isEmpty()) {
        // for input operator, use the watermark time coming from the source input
        inputWatermarkMin = watermark;
    } else {
        // InputWatermark(op) = min { OutputWatermark(op') | op' is upstream of op}
        inputWatermarkMin = prevOperators.stream().map(op -> op.getOutputWatermark()).min(Long::compare).get();
    }
    CompletionStage<Void> watermarkFuture = CompletableFuture.completedFuture(null);
    if (currentWatermark < inputWatermarkMin) {
        // advance the watermark time of this operator
        currentWatermark = inputWatermarkMin;
        LOG.trace("Advance input watermark to {} in operator {}", currentWatermark, getOpImplId());
        final Long outputWm;
        final Collection<RM> output;
        final WatermarkFunction watermarkFn = getOperatorSpec().getWatermarkFn();
        if (watermarkFn != null) {
            // user-overrided watermark handling here
            output = (Collection<RM>) watermarkFn.processWatermark(currentWatermark);
            outputWm = watermarkFn.getOutputWatermark();
        } else {
            // use samza-provided watermark handling
            // default is to propagate the input watermark
            output = handleWatermark(currentWatermark, collector, coordinator);
            outputWm = currentWatermark;
        }
        if (!output.isEmpty()) {
            watermarkFuture = CompletableFuture.allOf(output.stream().flatMap(rm -> this.registeredOperators.stream().map(op -> op.onMessageAsync(rm, collector, coordinator))).toArray(CompletableFuture[]::new));
        }
        watermarkFuture = watermarkFuture.thenCompose(res -> propagateWatermark(outputWm, collector, coordinator));
    }
    return watermarkFuture;
}
Also used : ScheduledFunction(org.apache.samza.operators.functions.ScheduledFunction) MetricsConfig(org.apache.samza.config.MetricsConfig) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskContext(org.apache.samza.context.TaskContext) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) Counter(org.apache.samza.metrics.Counter) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) CallbackScheduler(org.apache.samza.scheduler.CallbackScheduler) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) WatermarkMessage(org.apache.samza.system.WatermarkMessage) HighResolutionClock(org.apache.samza.util.HighResolutionClock) LinkedHashSet(java.util.LinkedHashSet) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Timer(org.apache.samza.metrics.Timer) Collection(java.util.Collection) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Scheduler(org.apache.samza.operators.Scheduler) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Context(org.apache.samza.context.Context) CompletionStage(java.util.concurrent.CompletionStage) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) InternalTaskContext(org.apache.samza.context.InternalTaskContext) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Collection (java.util.Collection)2 Collections (java.util.Collections)2 LinkedHashSet (java.util.LinkedHashSet)2 Set (java.util.Set)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 CompletionStage (java.util.concurrent.CompletionStage)2 SamzaException (org.apache.samza.SamzaException)2 Config (org.apache.samza.config.Config)2 JobConfig (org.apache.samza.config.JobConfig)2 MetricsConfig (org.apache.samza.config.MetricsConfig)2 TaskName (org.apache.samza.container.TaskName)2 ContainerContext (org.apache.samza.context.ContainerContext)2 Context (org.apache.samza.context.Context)2 InternalTaskContext (org.apache.samza.context.InternalTaskContext)2 TaskContext (org.apache.samza.context.TaskContext)2 TaskModel (org.apache.samza.job.model.TaskModel)2 Counter (org.apache.samza.metrics.Counter)2 MetricsRegistry (org.apache.samza.metrics.MetricsRegistry)2 Timer (org.apache.samza.metrics.Timer)2