use of org.apache.kafka.clients.consumer.Consumer in project kafka by apache.
the class StreamsPartitionAssignor method populateClientStatesMap.
/**
* Builds a map from client to state, and readies each ClientState for assignment by adding any missing prev tasks
* and computing the per-task overall lag based on the fetched end offsets for each changelog.
*
* @param clientStates a map from each client to its state, including offset lags. Populated by this method.
* @param clientMetadataMap a map from each client to its full metadata
* @param taskForPartition map from topic partition to its corresponding task
* @param changelogTopics object that manages changelog topics
*
* @return whether we were able to successfully fetch the changelog end offsets and compute each client's lag
*/
private boolean populateClientStatesMap(final Map<UUID, ClientState> clientStates, final Map<UUID, ClientMetadata> clientMetadataMap, final Map<TopicPartition, TaskId> taskForPartition, final ChangelogTopics changelogTopics) {
boolean fetchEndOffsetsSuccessful;
Map<TaskId, Long> allTaskEndOffsetSums;
try {
// Make the listOffsets request first so it can fetch the offsets for non-source changelogs
// asynchronously while we use the blocking Consumer#committed call to fetch source-changelog offsets
final KafkaFuture<Map<TopicPartition, ListOffsetsResultInfo>> endOffsetsFuture = fetchEndOffsetsFuture(changelogTopics.preExistingNonSourceTopicBasedPartitions(), adminClient);
final Map<TopicPartition, Long> sourceChangelogEndOffsets = fetchCommittedOffsets(changelogTopics.preExistingSourceTopicBasedPartitions(), mainConsumerSupplier.get());
final Map<TopicPartition, ListOffsetsResultInfo> endOffsets = ClientUtils.getEndOffsets(endOffsetsFuture);
allTaskEndOffsetSums = computeEndOffsetSumsByTask(endOffsets, sourceChangelogEndOffsets, changelogTopics);
fetchEndOffsetsSuccessful = true;
} catch (final StreamsException | TimeoutException e) {
allTaskEndOffsetSums = changelogTopics.statefulTaskIds().stream().collect(Collectors.toMap(t -> t, t -> UNKNOWN_OFFSET_SUM));
fetchEndOffsetsSuccessful = false;
}
for (final Map.Entry<UUID, ClientMetadata> entry : clientMetadataMap.entrySet()) {
final UUID uuid = entry.getKey();
final ClientState state = entry.getValue().state;
state.initializePrevTasks(taskForPartition, taskManager.topologyMetadata().hasNamedTopologies());
state.computeTaskLags(uuid, allTaskEndOffsetSums);
clientStates.put(uuid, state);
}
return fetchEndOffsetsSuccessful;
}
use of org.apache.kafka.clients.consumer.Consumer in project kafka by apache.
the class StoreChangelogReader method restore.
// 1. if there are any registered changelogs that needs initialization, try to initialize them first;
// 2. if all changelogs have finished, return early;
// 3. if there are any restoring changelogs, try to read from the restore consumer and process them.
@Override
public void restore(final Map<TaskId, Task> tasks) {
initializeChangelogs(tasks, registeredChangelogs());
if (!activeRestoringChangelogs().isEmpty() && state == ChangelogReaderState.STANDBY_UPDATING) {
throw new IllegalStateException("Should not be in standby updating state if there are still un-completed active changelogs");
}
if (allChangelogsCompleted()) {
log.debug("Finished restoring all changelogs {}", changelogs.keySet());
return;
}
final Set<TopicPartition> restoringChangelogs = restoringChangelogs();
if (!restoringChangelogs.isEmpty()) {
final ConsumerRecords<byte[], byte[]> polledRecords;
try {
// for restoring active and updating standby we may prefer different poll time
// in order to make sure we call the main consumer#poll in time.
// TODO: once we move ChangelogReader to a separate thread this may no longer be a concern
polledRecords = restoreConsumer.poll(state == ChangelogReaderState.STANDBY_UPDATING ? Duration.ZERO : pollTime);
// TODO (?) If we cannot fetch records during restore, should we trigger `task.timeout.ms` ?
// TODO (?) If we cannot fetch records for standby task, should we trigger `task.timeout.ms` ?
} catch (final InvalidOffsetException e) {
log.warn("Encountered " + e.getClass().getName() + " fetching records from restore consumer for partitions " + e.partitions() + ", it is likely that " + "the consumer's position has fallen out of the topic partition offset range because the topic was " + "truncated or compacted on the broker, marking the corresponding tasks as corrupted and re-initializing" + " it later.", e);
final Set<TaskId> corruptedTasks = new HashSet<>();
e.partitions().forEach(partition -> corruptedTasks.add(changelogs.get(partition).stateManager.taskId()));
throw new TaskCorruptedException(corruptedTasks, e);
} catch (final KafkaException e) {
throw new StreamsException("Restore consumer get unexpected error polling records.", e);
}
for (final TopicPartition partition : polledRecords.partitions()) {
bufferChangelogRecords(restoringChangelogByPartition(partition), polledRecords.records(partition));
}
for (final TopicPartition partition : restoringChangelogs) {
// even if some partition do not have any accumulated data, we still trigger
// restoring since some changelog may not need to restore any at all, and the
// restore to end check needs to be executed still.
// TODO: we always try to restore as a batch when some records are accumulated, which may result in
// small batches; this can be optimized in the future, e.g. wait longer for larger batches.
final TaskId taskId = changelogs.get(partition).stateManager.taskId();
try {
if (restoreChangelog(changelogs.get(partition))) {
tasks.get(taskId).clearTaskTimeout();
}
} catch (final TimeoutException timeoutException) {
tasks.get(taskId).maybeInitTaskTimeoutOrThrow(time.milliseconds(), timeoutException);
}
}
maybeUpdateLimitOffsetsForStandbyChangelogs(tasks);
maybeLogRestorationProgress();
}
}
use of org.apache.kafka.clients.consumer.Consumer in project kafka by apache.
the class SuppressionIntegrationTest method waitForAnyRecord.
private static boolean waitForAnyRecord(final String topic) {
final Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
try (final Consumer<Object, Object> consumer = new KafkaConsumer<>(properties)) {
final List<TopicPartition> partitions = consumer.partitionsFor(topic).stream().map(pi -> new TopicPartition(pi.topic(), pi.partition())).collect(Collectors.toList());
consumer.assign(partitions);
consumer.seekToBeginning(partitions);
final long start = System.currentTimeMillis();
while ((System.currentTimeMillis() - start) < DEFAULT_TIMEOUT) {
final ConsumerRecords<Object, Object> records = consumer.poll(ofMillis(500));
if (!records.isEmpty()) {
return true;
}
}
return false;
}
}
use of org.apache.kafka.clients.consumer.Consumer in project kafka by apache.
the class StreamsPartitionAssignorTest method shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount.
@Test
public void shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount() {
builder = new CorruptedInternalTopologyBuilder();
topologyMetadata = new TopologyMetadata(builder, new StreamsConfig(configProps()));
final InternalStreamsBuilder streamsBuilder = new InternalStreamsBuilder(builder);
final KStream<String, String> inputTopic = streamsBuilder.stream(singleton("topic1"), new ConsumedInternal<>());
final KTable<String, String> inputTable = streamsBuilder.table("topic2", new ConsumedInternal<>(), new MaterializedInternal<>(Materialized.as("store")));
inputTopic.groupBy((k, v) -> k, Grouped.with("GroupName", Serdes.String(), Serdes.String())).windowedBy(TimeWindows.of(Duration.ofMinutes(10))).aggregate(() -> "", (k, v, a) -> a + k).leftJoin(inputTable, v -> v, (x, y) -> x + y);
streamsBuilder.buildAndOptimizeTopology();
configureDefault();
subscriptions.put("consumer", new Subscription(singletonList("topic"), defaultSubscriptionInfo.encode()));
final Map<String, Assignment> assignments = partitionAssignor.assign(metadata, new GroupSubscription(subscriptions)).groupAssignment();
assertThat(AssignmentInfo.decode(assignments.get("consumer").userData()).errCode(), equalTo(AssignorError.ASSIGNMENT_ERROR.code()));
}
use of org.apache.kafka.clients.consumer.Consumer in project kafka by apache.
the class TaskManagerTest method handleAssignment.
private Map<TaskId, StateMachineTask> handleAssignment(final Map<TaskId, Set<TopicPartition>> runningActiveAssignment, final Map<TaskId, Set<TopicPartition>> standbyAssignment, final Map<TaskId, Set<TopicPartition>> restoringActiveAssignment) {
final Set<Task> runningTasks = runningActiveAssignment.entrySet().stream().map(t -> new StateMachineTask(t.getKey(), t.getValue(), true)).collect(Collectors.toSet());
final Set<Task> standbyTasks = standbyAssignment.entrySet().stream().map(t -> new StateMachineTask(t.getKey(), t.getValue(), false)).collect(Collectors.toSet());
final Set<Task> restoringTasks = restoringActiveAssignment.entrySet().stream().map(t -> new StateMachineTask(t.getKey(), t.getValue(), true)).collect(Collectors.toSet());
// give the restoring tasks some uncompleted changelog partitions so they'll stay in restoring
restoringTasks.forEach(t -> ((StateMachineTask) t).setChangelogOffsets(singletonMap(new TopicPartition("changelog", 0), 0L)));
// Initially assign only the active tasks we want to complete restoration
final Map<TaskId, Set<TopicPartition>> allActiveTasksAssignment = new HashMap<>(runningActiveAssignment);
allActiveTasksAssignment.putAll(restoringActiveAssignment);
final Set<Task> allActiveTasks = new HashSet<>(runningTasks);
allActiveTasks.addAll(restoringTasks);
expect(standbyTaskCreator.createTasks(eq(standbyAssignment))).andStubReturn(standbyTasks);
expect(activeTaskCreator.createTasks(anyObject(), eq(allActiveTasksAssignment))).andStubReturn(allActiveTasks);
expectRestoreToBeCompleted(consumer, changeLogReader);
replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
taskManager.handleAssignment(allActiveTasksAssignment, standbyAssignment);
taskManager.tryToCompleteRestoration(time.milliseconds(), null);
final Map<TaskId, StateMachineTask> allTasks = new HashMap<>();
// Just make sure all tasks ended up in the expected state
for (final Task task : runningTasks) {
assertThat(task.state(), is(Task.State.RUNNING));
allTasks.put(task.id(), (StateMachineTask) task);
}
for (final Task task : restoringTasks) {
assertThat(task.state(), is(Task.State.RESTORING));
allTasks.put(task.id(), (StateMachineTask) task);
}
for (final Task task : standbyTasks) {
assertThat(task.state(), is(Task.State.RUNNING));
allTasks.put(task.id(), (StateMachineTask) task);
}
return allTasks;
}
Aggregations