use of org.apache.kafka.streams.errors.TaskAssignmentException in project kafka by apache.
the class StreamsPartitionAssignor method assignTasksToThreads.
/**
* Generate an assignment that tries to preserve thread-level stickiness of stateful tasks without violating
* balance. The stateful and total task load are both balanced across threads. Tasks without previous owners
* will be interleaved by group id to spread subtopologies across threads and further balance the workload.
*/
static Map<String, List<TaskId>> assignTasksToThreads(final Collection<TaskId> statefulTasksToAssign, final Collection<TaskId> statelessTasksToAssign, final SortedSet<String> consumers, final ClientState state) {
final Map<String, List<TaskId>> assignment = new HashMap<>();
for (final String consumer : consumers) {
assignment.put(consumer, new ArrayList<>());
}
final List<TaskId> unassignedStatelessTasks = new ArrayList<>(statelessTasksToAssign);
Collections.sort(unassignedStatelessTasks);
final Iterator<TaskId> unassignedStatelessTasksIter = unassignedStatelessTasks.iterator();
final int minStatefulTasksPerThread = (int) Math.floor(((double) statefulTasksToAssign.size()) / consumers.size());
final PriorityQueue<TaskId> unassignedStatefulTasks = new PriorityQueue<>(statefulTasksToAssign);
final Queue<String> consumersToFill = new LinkedList<>();
// keep track of tasks that we have to skip during the first pass in case we can reassign them later
// using tree-map to make sure the iteration ordering over keys are preserved
final Map<TaskId, String> unassignedTaskToPreviousOwner = new TreeMap<>();
if (!unassignedStatefulTasks.isEmpty()) {
// First assign stateful tasks to previous owner, up to the min expected tasks/thread
for (final String consumer : consumers) {
final List<TaskId> threadAssignment = assignment.get(consumer);
for (final TaskId task : state.prevTasksByLag(consumer)) {
if (unassignedStatefulTasks.contains(task)) {
if (threadAssignment.size() < minStatefulTasksPerThread) {
threadAssignment.add(task);
unassignedStatefulTasks.remove(task);
} else {
unassignedTaskToPreviousOwner.put(task, consumer);
}
}
}
if (threadAssignment.size() < minStatefulTasksPerThread) {
consumersToFill.offer(consumer);
}
}
// Next interleave remaining unassigned tasks amongst unfilled consumers
while (!consumersToFill.isEmpty()) {
final TaskId task = unassignedStatefulTasks.poll();
if (task != null) {
final String consumer = consumersToFill.poll();
final List<TaskId> threadAssignment = assignment.get(consumer);
threadAssignment.add(task);
if (threadAssignment.size() < minStatefulTasksPerThread) {
consumersToFill.offer(consumer);
}
} else {
throw new TaskAssignmentException("Ran out of unassigned stateful tasks but some members were not at capacity");
}
}
// stateful tasks still remaining that should now be distributed over the consumers
if (!unassignedStatefulTasks.isEmpty()) {
consumersToFill.addAll(consumers);
// Go over the tasks we skipped earlier and assign them to their previous owner when possible
for (final Map.Entry<TaskId, String> taskEntry : unassignedTaskToPreviousOwner.entrySet()) {
final TaskId task = taskEntry.getKey();
final String consumer = taskEntry.getValue();
if (consumersToFill.contains(consumer) && unassignedStatefulTasks.contains(task)) {
assignment.get(consumer).add(task);
unassignedStatefulTasks.remove(task);
// Remove this consumer since we know it is now at minCapacity + 1
consumersToFill.remove(consumer);
}
}
// Now just distribute the remaining unassigned stateful tasks over the consumers still at min capacity
for (final TaskId task : unassignedStatefulTasks) {
final String consumer = consumersToFill.poll();
final List<TaskId> threadAssignment = assignment.get(consumer);
threadAssignment.add(task);
}
// capacity + 1, so start distributing stateless tasks to get all consumers back to the same count
while (unassignedStatelessTasksIter.hasNext()) {
final String consumer = consumersToFill.poll();
if (consumer != null) {
final TaskId task = unassignedStatelessTasksIter.next();
unassignedStatelessTasksIter.remove();
assignment.get(consumer).add(task);
} else {
break;
}
}
}
}
// Now just distribute tasks while circling through all the consumers
consumersToFill.addAll(consumers);
while (unassignedStatelessTasksIter.hasNext()) {
final TaskId task = unassignedStatelessTasksIter.next();
final String consumer = consumersToFill.poll();
assignment.get(consumer).add(task);
consumersToFill.offer(consumer);
}
return assignment;
}
use of org.apache.kafka.streams.errors.TaskAssignmentException in project kafka by apache.
the class SubscriptionInfo method setTaskOffsetSumDataFromTaskOffsetSumMap.
// For MIN_NAMED_TOPOLOGY_VERSION > version > MIN_VERSION_OFFSET_SUM_SUBSCRIPTION
private void setTaskOffsetSumDataFromTaskOffsetSumMap(final Map<TaskId, Long> taskOffsetSums) {
final Map<Integer, List<SubscriptionInfoData.PartitionToOffsetSum>> topicGroupIdToPartitionOffsetSum = new HashMap<>();
for (final Map.Entry<TaskId, Long> taskEntry : taskOffsetSums.entrySet()) {
final TaskId task = taskEntry.getKey();
if (task.topologyName() != null) {
throw new TaskAssignmentException("Named topologies are not compatible with older protocol versions");
}
topicGroupIdToPartitionOffsetSum.computeIfAbsent(task.subtopology(), t -> new ArrayList<>()).add(new SubscriptionInfoData.PartitionToOffsetSum().setPartition(task.partition()).setOffsetSum(taskEntry.getValue()));
}
data.setTaskOffsetSums(topicGroupIdToPartitionOffsetSum.entrySet().stream().map(t -> {
final SubscriptionInfoData.TaskOffsetSum taskOffsetSum = new SubscriptionInfoData.TaskOffsetSum();
taskOffsetSum.setTopicGroupId(t.getKey());
taskOffsetSum.setPartitionToOffsetSum(t.getValue());
return taskOffsetSum;
}).collect(Collectors.toList()));
}
use of org.apache.kafka.streams.errors.TaskAssignmentException in project kafka by apache.
the class ConsumerProtocolUtils method writeTaskIdTo.
/**
* @throws IOException if cannot write to output stream
*/
public static void writeTaskIdTo(final TaskId taskId, final DataOutputStream out, final int version) throws IOException {
out.writeInt(taskId.subtopology());
out.writeInt(taskId.partition());
if (version >= MIN_NAMED_TOPOLOGY_VERSION) {
if (taskId.topologyName() != null) {
out.writeInt(taskId.topologyName().length());
out.writeChars(taskId.topologyName());
} else {
out.writeInt(0);
}
} else if (taskId.topologyName() != null) {
throw new TaskAssignmentException("Named topologies are not compatible with protocol version " + version);
}
}
use of org.apache.kafka.streams.errors.TaskAssignmentException in project kafka by apache.
the class StreamsRebalanceListenerTest method shouldThrowTaskAssignmentExceptionOnUnrecognizedErrorCode.
@Test
public void shouldThrowTaskAssignmentExceptionOnUnrecognizedErrorCode() {
replay(taskManager, streamThread);
assignmentErrorCode.set(Integer.MAX_VALUE);
final TaskAssignmentException exception = assertThrows(TaskAssignmentException.class, () -> streamsRebalanceListener.onPartitionsAssigned(Collections.emptyList()));
assertThat(exception.getMessage(), is("Hit an unrecognized exception during rebalance"));
verify(taskManager, streamThread);
}
Aggregations