Search in sources :

Example 86 with StreamsException

use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.

the class TaskExecutor method commitOffsetsOrTransaction.

/**
 * Caution: do not invoke this directly if it's possible a rebalance is occurring, as the commit will fail. If
 * this is a possibility, prefer the {@link #commitTasksAndMaybeUpdateCommittableOffsets} instead.
 *
 * @throws TaskMigratedException   if committing offsets failed due to CommitFailedException (non-EOS)
 * @throws TimeoutException        if committing offsets failed due to TimeoutException (non-EOS)
 * @throws TaskCorruptedException  if committing offsets failed due to TimeoutException (EOS)
 */
void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, OffsetAndMetadata>> offsetsPerTask) {
    // avoid logging actual Task objects
    log.debug("Committing task offsets {}", offsetsPerTask.entrySet().stream().collect(Collectors.toMap(t -> t.getKey().id(), Entry::getValue)));
    final Set<TaskId> corruptedTasks = new HashSet<>();
    if (!offsetsPerTask.isEmpty()) {
        if (processingMode == EXACTLY_ONCE_ALPHA) {
            for (final Map.Entry<Task, Map<TopicPartition, OffsetAndMetadata>> taskToCommit : offsetsPerTask.entrySet()) {
                final Task task = taskToCommit.getKey();
                try {
                    tasks.streamsProducerForTask(task.id()).commitTransaction(taskToCommit.getValue(), tasks.mainConsumer().groupMetadata());
                    updateTaskCommitMetadata(taskToCommit.getValue());
                } catch (final TimeoutException timeoutException) {
                    log.error(String.format("Committing task %s failed.", task.id()), timeoutException);
                    corruptedTasks.add(task.id());
                }
            }
        } else {
            final Map<TopicPartition, OffsetAndMetadata> allOffsets = offsetsPerTask.values().stream().flatMap(e -> e.entrySet().stream()).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
            if (processingMode == EXACTLY_ONCE_V2) {
                try {
                    tasks.threadProducer().commitTransaction(allOffsets, tasks.mainConsumer().groupMetadata());
                    updateTaskCommitMetadata(allOffsets);
                } catch (final TimeoutException timeoutException) {
                    log.error(String.format("Committing task(s) %s failed.", offsetsPerTask.keySet().stream().map(t -> t.id().toString()).collect(Collectors.joining(", "))), timeoutException);
                    offsetsPerTask.keySet().forEach(task -> corruptedTasks.add(task.id()));
                }
            } else {
                try {
                    tasks.mainConsumer().commitSync(allOffsets);
                    updateTaskCommitMetadata(allOffsets);
                } catch (final CommitFailedException error) {
                    throw new TaskMigratedException("Consumer committing offsets failed, " + "indicating the corresponding thread is no longer part of the group", error);
                } catch (final TimeoutException timeoutException) {
                    log.error(String.format("Committing task(s) %s failed.", offsetsPerTask.keySet().stream().map(t -> t.id().toString()).collect(Collectors.joining(", "))), timeoutException);
                    throw timeoutException;
                } catch (final KafkaException error) {
                    throw new StreamsException("Error encountered committing offsets via consumer", error);
                }
            }
        }
        if (!corruptedTasks.isEmpty()) {
            throw new TaskCorruptedException(corruptedTasks);
        }
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) TimeoutException(org.apache.kafka.common.errors.TimeoutException) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) TaskId(org.apache.kafka.streams.processor.TaskId) Collection(java.util.Collection) KafkaException(org.apache.kafka.common.KafkaException) Set(java.util.Set) HashMap(java.util.HashMap) StreamsException(org.apache.kafka.streams.errors.StreamsException) ProcessingMode(org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode) Collectors(java.util.stream.Collectors) HashSet(java.util.HashSet) EXACTLY_ONCE_V2(org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode.EXACTLY_ONCE_V2) EXACTLY_ONCE_ALPHA(org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode.EXACTLY_ONCE_ALPHA) TaskMigratedException(org.apache.kafka.streams.errors.TaskMigratedException) LogContext(org.apache.kafka.common.utils.LogContext) Map(java.util.Map) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Entry(java.util.Map.Entry) CommitFailedException(org.apache.kafka.clients.consumer.CommitFailedException) TaskCorruptedException(org.apache.kafka.streams.errors.TaskCorruptedException) TaskId(org.apache.kafka.streams.processor.TaskId) StreamsException(org.apache.kafka.streams.errors.StreamsException) Entry(java.util.Map.Entry) TaskCorruptedException(org.apache.kafka.streams.errors.TaskCorruptedException) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) Map(java.util.Map) CommitFailedException(org.apache.kafka.clients.consumer.CommitFailedException) HashSet(java.util.HashSet) TimeoutException(org.apache.kafka.common.errors.TimeoutException) TaskMigratedException(org.apache.kafka.streams.errors.TaskMigratedException)

Example 87 with StreamsException

use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.

the class StoreChangelogReader method restore.

// 1. if there are any registered changelogs that needs initialization, try to initialize them first;
// 2. if all changelogs have finished, return early;
// 3. if there are any restoring changelogs, try to read from the restore consumer and process them.
@Override
public void restore(final Map<TaskId, Task> tasks) {
    initializeChangelogs(tasks, registeredChangelogs());
    if (!activeRestoringChangelogs().isEmpty() && state == ChangelogReaderState.STANDBY_UPDATING) {
        throw new IllegalStateException("Should not be in standby updating state if there are still un-completed active changelogs");
    }
    if (allChangelogsCompleted()) {
        log.debug("Finished restoring all changelogs {}", changelogs.keySet());
        return;
    }
    final Set<TopicPartition> restoringChangelogs = restoringChangelogs();
    if (!restoringChangelogs.isEmpty()) {
        final ConsumerRecords<byte[], byte[]> polledRecords;
        try {
            // for restoring active and updating standby we may prefer different poll time
            // in order to make sure we call the main consumer#poll in time.
            // TODO: once we move ChangelogReader to a separate thread this may no longer be a concern
            polledRecords = restoreConsumer.poll(state == ChangelogReaderState.STANDBY_UPDATING ? Duration.ZERO : pollTime);
        // TODO (?) If we cannot fetch records during restore, should we trigger `task.timeout.ms` ?
        // TODO (?) If we cannot fetch records for standby task, should we trigger `task.timeout.ms` ?
        } catch (final InvalidOffsetException e) {
            log.warn("Encountered " + e.getClass().getName() + " fetching records from restore consumer for partitions " + e.partitions() + ", it is likely that " + "the consumer's position has fallen out of the topic partition offset range because the topic was " + "truncated or compacted on the broker, marking the corresponding tasks as corrupted and re-initializing" + " it later.", e);
            final Set<TaskId> corruptedTasks = new HashSet<>();
            e.partitions().forEach(partition -> corruptedTasks.add(changelogs.get(partition).stateManager.taskId()));
            throw new TaskCorruptedException(corruptedTasks, e);
        } catch (final KafkaException e) {
            throw new StreamsException("Restore consumer get unexpected error polling records.", e);
        }
        for (final TopicPartition partition : polledRecords.partitions()) {
            bufferChangelogRecords(restoringChangelogByPartition(partition), polledRecords.records(partition));
        }
        for (final TopicPartition partition : restoringChangelogs) {
            // even if some partition do not have any accumulated data, we still trigger
            // restoring since some changelog may not need to restore any at all, and the
            // restore to end check needs to be executed still.
            // TODO: we always try to restore as a batch when some records are accumulated, which may result in
            // small batches; this can be optimized in the future, e.g. wait longer for larger batches.
            final TaskId taskId = changelogs.get(partition).stateManager.taskId();
            try {
                if (restoreChangelog(changelogs.get(partition))) {
                    tasks.get(taskId).clearTaskTimeout();
                }
            } catch (final TimeoutException timeoutException) {
                tasks.get(taskId).maybeInitTaskTimeoutOrThrow(time.milliseconds(), timeoutException);
            }
        }
        maybeUpdateLimitOffsetsForStandbyChangelogs(tasks);
        maybeLogRestorationProgress();
    }
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) TaskId(org.apache.kafka.streams.processor.TaskId) KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) StreamsException(org.apache.kafka.streams.errors.StreamsException) ClientUtils.fetchCommittedOffsets(org.apache.kafka.streams.processor.internals.ClientUtils.fetchCommittedOffsets) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ListOffsetsResult(org.apache.kafka.clients.admin.ListOffsetsResult) LogContext(org.apache.kafka.common.utils.LogContext) Duration(java.time.Duration) Map(java.util.Map) Admin(org.apache.kafka.clients.admin.Admin) TaskCorruptedException(org.apache.kafka.streams.errors.TaskCorruptedException) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) StateStoreMetadata(org.apache.kafka.streams.processor.internals.ProcessorStateManager.StateStoreMetadata) TimeoutException(org.apache.kafka.common.errors.TimeoutException) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) Collection(java.util.Collection) Set(java.util.Set) StateRestoreListener(org.apache.kafka.streams.processor.StateRestoreListener) Collectors(java.util.stream.Collectors) OffsetSpec(org.apache.kafka.clients.admin.OffsetSpec) ExecutionException(java.util.concurrent.ExecutionException) IsolationLevel(org.apache.kafka.common.IsolationLevel) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) InvalidOffsetException(org.apache.kafka.clients.consumer.InvalidOffsetException) ListOffsetsOptions(org.apache.kafka.clients.admin.ListOffsetsOptions) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) TaskId(org.apache.kafka.streams.processor.TaskId) StreamsException(org.apache.kafka.streams.errors.StreamsException) InvalidOffsetException(org.apache.kafka.clients.consumer.InvalidOffsetException) TaskCorruptedException(org.apache.kafka.streams.errors.TaskCorruptedException) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaException(org.apache.kafka.common.KafkaException) TimeoutException(org.apache.kafka.common.errors.TimeoutException)

Example 88 with StreamsException

use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.

the class StreamTask method commitNeeded.

@Override
public boolean commitNeeded() {
    // there may be non data records such as control markers bypassed
    if (commitNeeded) {
        return true;
    } else {
        for (final Map.Entry<TopicPartition, Long> entry : consumedOffsets.entrySet()) {
            final TopicPartition partition = entry.getKey();
            try {
                final long offset = mainConsumer.position(partition);
                // more than 1 it means there are control records, which the consumer skips over silently
                if (offset > entry.getValue() + 1) {
                    commitNeeded = true;
                    entry.setValue(offset - 1);
                }
            } catch (final TimeoutException error) {
                // hence, a `TimeoutException` indicates a bug and thus we rethrow it as fatal `IllegalStateException`
                throw new IllegalStateException(error);
            } catch (final KafkaException fatal) {
                throw new StreamsException(fatal);
            }
        }
        return commitNeeded;
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) StreamsException(org.apache.kafka.streams.errors.StreamsException) KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) Map(java.util.Map) TimeoutException(org.apache.kafka.common.errors.TimeoutException)

Example 89 with StreamsException

use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.

the class StreamTask method punctuate.

/**
 * @throws IllegalStateException if the current node is not null
 * @throws TaskMigratedException if the task producer got fenced (EOS only)
 */
@SuppressWarnings("unchecked")
@Override
public void punctuate(final ProcessorNode<?, ?, ?, ?> node, final long timestamp, final PunctuationType type, final Punctuator punctuator) {
    if (processorContext.currentNode() != null) {
        throw new IllegalStateException(String.format("%sCurrent node is not null", logPrefix));
    }
    // when punctuating, we need to preserve the timestamp (this can be either system time or event time)
    // while other record context are set as dummy: null topic, -1 partition, -1 offset and empty header
    final ProcessorRecordContext recordContext = new ProcessorRecordContext(timestamp, -1L, -1, null, new RecordHeaders());
    updateProcessorContext(node, time.milliseconds(), recordContext);
    if (log.isTraceEnabled()) {
        log.trace("Punctuating processor {} with timestamp {} and punctuation type {}", node.name(), timestamp, type);
    }
    try {
        maybeMeasureLatency(() -> node.punctuate(timestamp, punctuator), time, punctuateLatencySensor);
    } catch (final StreamsException e) {
        throw e;
    } catch (final RuntimeException e) {
        throw new StreamsException(String.format("%sException caught while punctuating processor '%s'", logPrefix, node.name()), e);
    } finally {
        processorContext.setCurrentNode(null);
    }
}
Also used : RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) StreamsException(org.apache.kafka.streams.errors.StreamsException)

Example 90 with StreamsException

use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.

the class StreamTask method process.

/**
 * Process one record.
 *
 * @return true if this method processes a record, false if it does not process a record.
 * @throws TaskMigratedException if the task producer got fenced (EOS only)
 */
@SuppressWarnings("unchecked")
public boolean process(final long wallClockTime) {
    if (record == null) {
        if (!isProcessable(wallClockTime)) {
            return false;
        }
        // get the next record to process
        record = partitionGroup.nextRecord(recordInfo, wallClockTime);
        // if there is no record to process, return immediately
        if (record == null) {
            return false;
        }
    }
    try {
        final TopicPartition partition = recordInfo.partition();
        if (!(record instanceof CorruptedRecord)) {
            doProcess(wallClockTime);
        }
        // update the consumed offset map after processing is done
        consumedOffsets.put(partition, record.offset());
        commitNeeded = true;
        // decreased to the threshold, we can then resume the consumption on this partition
        if (recordInfo.queue().size() == maxBufferedSize) {
            mainConsumer.resume(singleton(partition));
        }
        record = null;
    } catch (final TimeoutException timeoutException) {
        if (!eosEnabled) {
            throw timeoutException;
        } else {
            record = null;
            throw new TaskCorruptedException(Collections.singleton(id));
        }
    } catch (final StreamsException exception) {
        record = null;
        throw exception;
    } catch (final RuntimeException e) {
        final StreamsException error = new StreamsException(String.format("Exception caught in process. taskId=%s, processor=%s, topic=%s, partition=%d, offset=%d, stacktrace=%s", id(), processorContext.currentNode().name(), record.topic(), record.partition(), record.offset(), getStacktraceString(e)), e);
        record = null;
        throw error;
    } finally {
        processorContext.setCurrentNode(null);
    }
    return true;
}
Also used : TaskCorruptedException(org.apache.kafka.streams.errors.TaskCorruptedException) TopicPartition(org.apache.kafka.common.TopicPartition) StreamsException(org.apache.kafka.streams.errors.StreamsException) TimeoutException(org.apache.kafka.common.errors.TimeoutException)

Aggregations

StreamsException (org.apache.kafka.streams.errors.StreamsException)186 Test (org.junit.Test)90 KafkaException (org.apache.kafka.common.KafkaException)41 TopicPartition (org.apache.kafka.common.TopicPartition)38 TimeoutException (org.apache.kafka.common.errors.TimeoutException)36 HashMap (java.util.HashMap)27 Map (java.util.Map)25 HashSet (java.util.HashSet)18 Properties (java.util.Properties)17 TaskId (org.apache.kafka.streams.processor.TaskId)14 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)13 StreamsConfig (org.apache.kafka.streams.StreamsConfig)12 ArrayList (java.util.ArrayList)11 ExecutionException (java.util.concurrent.ExecutionException)11 TaskMigratedException (org.apache.kafka.streams.errors.TaskMigratedException)11 IOException (java.io.IOException)10 Set (java.util.Set)10 LogContext (org.apache.kafka.common.utils.LogContext)10 MockTime (org.apache.kafka.common.utils.MockTime)10 StateStore (org.apache.kafka.streams.processor.StateStore)10