use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.
the class TaskExecutor method commitOffsetsOrTransaction.
/**
* Caution: do not invoke this directly if it's possible a rebalance is occurring, as the commit will fail. If
* this is a possibility, prefer the {@link #commitTasksAndMaybeUpdateCommittableOffsets} instead.
*
* @throws TaskMigratedException if committing offsets failed due to CommitFailedException (non-EOS)
* @throws TimeoutException if committing offsets failed due to TimeoutException (non-EOS)
* @throws TaskCorruptedException if committing offsets failed due to TimeoutException (EOS)
*/
void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, OffsetAndMetadata>> offsetsPerTask) {
// avoid logging actual Task objects
log.debug("Committing task offsets {}", offsetsPerTask.entrySet().stream().collect(Collectors.toMap(t -> t.getKey().id(), Entry::getValue)));
final Set<TaskId> corruptedTasks = new HashSet<>();
if (!offsetsPerTask.isEmpty()) {
if (processingMode == EXACTLY_ONCE_ALPHA) {
for (final Map.Entry<Task, Map<TopicPartition, OffsetAndMetadata>> taskToCommit : offsetsPerTask.entrySet()) {
final Task task = taskToCommit.getKey();
try {
tasks.streamsProducerForTask(task.id()).commitTransaction(taskToCommit.getValue(), tasks.mainConsumer().groupMetadata());
updateTaskCommitMetadata(taskToCommit.getValue());
} catch (final TimeoutException timeoutException) {
log.error(String.format("Committing task %s failed.", task.id()), timeoutException);
corruptedTasks.add(task.id());
}
}
} else {
final Map<TopicPartition, OffsetAndMetadata> allOffsets = offsetsPerTask.values().stream().flatMap(e -> e.entrySet().stream()).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
if (processingMode == EXACTLY_ONCE_V2) {
try {
tasks.threadProducer().commitTransaction(allOffsets, tasks.mainConsumer().groupMetadata());
updateTaskCommitMetadata(allOffsets);
} catch (final TimeoutException timeoutException) {
log.error(String.format("Committing task(s) %s failed.", offsetsPerTask.keySet().stream().map(t -> t.id().toString()).collect(Collectors.joining(", "))), timeoutException);
offsetsPerTask.keySet().forEach(task -> corruptedTasks.add(task.id()));
}
} else {
try {
tasks.mainConsumer().commitSync(allOffsets);
updateTaskCommitMetadata(allOffsets);
} catch (final CommitFailedException error) {
throw new TaskMigratedException("Consumer committing offsets failed, " + "indicating the corresponding thread is no longer part of the group", error);
} catch (final TimeoutException timeoutException) {
log.error(String.format("Committing task(s) %s failed.", offsetsPerTask.keySet().stream().map(t -> t.id().toString()).collect(Collectors.joining(", "))), timeoutException);
throw timeoutException;
} catch (final KafkaException error) {
throw new StreamsException("Error encountered committing offsets via consumer", error);
}
}
}
if (!corruptedTasks.isEmpty()) {
throw new TaskCorruptedException(corruptedTasks);
}
}
}
use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.
the class StoreChangelogReader method restore.
// 1. if there are any registered changelogs that needs initialization, try to initialize them first;
// 2. if all changelogs have finished, return early;
// 3. if there are any restoring changelogs, try to read from the restore consumer and process them.
@Override
public void restore(final Map<TaskId, Task> tasks) {
initializeChangelogs(tasks, registeredChangelogs());
if (!activeRestoringChangelogs().isEmpty() && state == ChangelogReaderState.STANDBY_UPDATING) {
throw new IllegalStateException("Should not be in standby updating state if there are still un-completed active changelogs");
}
if (allChangelogsCompleted()) {
log.debug("Finished restoring all changelogs {}", changelogs.keySet());
return;
}
final Set<TopicPartition> restoringChangelogs = restoringChangelogs();
if (!restoringChangelogs.isEmpty()) {
final ConsumerRecords<byte[], byte[]> polledRecords;
try {
// for restoring active and updating standby we may prefer different poll time
// in order to make sure we call the main consumer#poll in time.
// TODO: once we move ChangelogReader to a separate thread this may no longer be a concern
polledRecords = restoreConsumer.poll(state == ChangelogReaderState.STANDBY_UPDATING ? Duration.ZERO : pollTime);
// TODO (?) If we cannot fetch records during restore, should we trigger `task.timeout.ms` ?
// TODO (?) If we cannot fetch records for standby task, should we trigger `task.timeout.ms` ?
} catch (final InvalidOffsetException e) {
log.warn("Encountered " + e.getClass().getName() + " fetching records from restore consumer for partitions " + e.partitions() + ", it is likely that " + "the consumer's position has fallen out of the topic partition offset range because the topic was " + "truncated or compacted on the broker, marking the corresponding tasks as corrupted and re-initializing" + " it later.", e);
final Set<TaskId> corruptedTasks = new HashSet<>();
e.partitions().forEach(partition -> corruptedTasks.add(changelogs.get(partition).stateManager.taskId()));
throw new TaskCorruptedException(corruptedTasks, e);
} catch (final KafkaException e) {
throw new StreamsException("Restore consumer get unexpected error polling records.", e);
}
for (final TopicPartition partition : polledRecords.partitions()) {
bufferChangelogRecords(restoringChangelogByPartition(partition), polledRecords.records(partition));
}
for (final TopicPartition partition : restoringChangelogs) {
// even if some partition do not have any accumulated data, we still trigger
// restoring since some changelog may not need to restore any at all, and the
// restore to end check needs to be executed still.
// TODO: we always try to restore as a batch when some records are accumulated, which may result in
// small batches; this can be optimized in the future, e.g. wait longer for larger batches.
final TaskId taskId = changelogs.get(partition).stateManager.taskId();
try {
if (restoreChangelog(changelogs.get(partition))) {
tasks.get(taskId).clearTaskTimeout();
}
} catch (final TimeoutException timeoutException) {
tasks.get(taskId).maybeInitTaskTimeoutOrThrow(time.milliseconds(), timeoutException);
}
}
maybeUpdateLimitOffsetsForStandbyChangelogs(tasks);
maybeLogRestorationProgress();
}
}
use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.
the class StreamTask method commitNeeded.
@Override
public boolean commitNeeded() {
// there may be non data records such as control markers bypassed
if (commitNeeded) {
return true;
} else {
for (final Map.Entry<TopicPartition, Long> entry : consumedOffsets.entrySet()) {
final TopicPartition partition = entry.getKey();
try {
final long offset = mainConsumer.position(partition);
// more than 1 it means there are control records, which the consumer skips over silently
if (offset > entry.getValue() + 1) {
commitNeeded = true;
entry.setValue(offset - 1);
}
} catch (final TimeoutException error) {
// hence, a `TimeoutException` indicates a bug and thus we rethrow it as fatal `IllegalStateException`
throw new IllegalStateException(error);
} catch (final KafkaException fatal) {
throw new StreamsException(fatal);
}
}
return commitNeeded;
}
}
use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.
the class StreamTask method punctuate.
/**
* @throws IllegalStateException if the current node is not null
* @throws TaskMigratedException if the task producer got fenced (EOS only)
*/
@SuppressWarnings("unchecked")
@Override
public void punctuate(final ProcessorNode<?, ?, ?, ?> node, final long timestamp, final PunctuationType type, final Punctuator punctuator) {
if (processorContext.currentNode() != null) {
throw new IllegalStateException(String.format("%sCurrent node is not null", logPrefix));
}
// when punctuating, we need to preserve the timestamp (this can be either system time or event time)
// while other record context are set as dummy: null topic, -1 partition, -1 offset and empty header
final ProcessorRecordContext recordContext = new ProcessorRecordContext(timestamp, -1L, -1, null, new RecordHeaders());
updateProcessorContext(node, time.milliseconds(), recordContext);
if (log.isTraceEnabled()) {
log.trace("Punctuating processor {} with timestamp {} and punctuation type {}", node.name(), timestamp, type);
}
try {
maybeMeasureLatency(() -> node.punctuate(timestamp, punctuator), time, punctuateLatencySensor);
} catch (final StreamsException e) {
throw e;
} catch (final RuntimeException e) {
throw new StreamsException(String.format("%sException caught while punctuating processor '%s'", logPrefix, node.name()), e);
} finally {
processorContext.setCurrentNode(null);
}
}
use of org.apache.kafka.streams.errors.StreamsException in project kafka by apache.
the class StreamTask method process.
/**
* Process one record.
*
* @return true if this method processes a record, false if it does not process a record.
* @throws TaskMigratedException if the task producer got fenced (EOS only)
*/
@SuppressWarnings("unchecked")
public boolean process(final long wallClockTime) {
if (record == null) {
if (!isProcessable(wallClockTime)) {
return false;
}
// get the next record to process
record = partitionGroup.nextRecord(recordInfo, wallClockTime);
// if there is no record to process, return immediately
if (record == null) {
return false;
}
}
try {
final TopicPartition partition = recordInfo.partition();
if (!(record instanceof CorruptedRecord)) {
doProcess(wallClockTime);
}
// update the consumed offset map after processing is done
consumedOffsets.put(partition, record.offset());
commitNeeded = true;
// decreased to the threshold, we can then resume the consumption on this partition
if (recordInfo.queue().size() == maxBufferedSize) {
mainConsumer.resume(singleton(partition));
}
record = null;
} catch (final TimeoutException timeoutException) {
if (!eosEnabled) {
throw timeoutException;
} else {
record = null;
throw new TaskCorruptedException(Collections.singleton(id));
}
} catch (final StreamsException exception) {
record = null;
throw exception;
} catch (final RuntimeException e) {
final StreamsException error = new StreamsException(String.format("Exception caught in process. taskId=%s, processor=%s, topic=%s, partition=%d, offset=%d, stacktrace=%s", id(), processorContext.currentNode().name(), record.topic(), record.partition(), record.offset(), getStacktraceString(e)), e);
record = null;
throw error;
} finally {
processorContext.setCurrentNode(null);
}
return true;
}
Aggregations