use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks in project kafka by apache.
the class IncrementalCooperativeAssignor method performTaskAssignment.
/**
* Performs task assignment based on the incremental cooperative connect protocol.
* Read more on the design and implementation in:
* {@see https://cwiki.apache.org/confluence/display/KAFKA/KIP-415%3A+Incremental+Cooperative+Rebalancing+in+Kafka+Connect}
*
* @param leaderId the ID of the group leader
* @param maxOffset the latest known offset of the configuration topic
* @param memberConfigs the metadata of all the members of the group as gather in the current
* round of rebalancing
* @param coordinator the worker coordinator instance that provide the configuration snapshot
* and get assigned the leader state during this assignment
* @param protocolVersion the Connect subprotocol version
* @return the serialized assignment of tasks to the whole group, including assigned or
* revoked tasks
*/
protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long maxOffset, Map<String, ExtendedWorkerState> memberConfigs, WorkerCoordinator coordinator, short protocolVersion) {
log.debug("Performing task assignment during generation: {} with memberId: {}", coordinator.generationId(), coordinator.memberId());
// Base set: The previous assignment of connectors-and-tasks is a standalone snapshot that
// can be used to calculate derived sets
log.debug("Previous assignments: {}", previousAssignment);
int lastCompletedGenerationId = coordinator.lastCompletedGenerationId();
if (previousGenerationId != lastCompletedGenerationId) {
log.debug("Clearing the view of previous assignments due to generation mismatch between " + "previous generation ID {} and last completed generation ID {}. This can " + "happen if the leader fails to sync the assignment within a rebalancing round. " + "The following view of previous assignments might be outdated and will be " + "ignored by the leader in the current computation of new assignments. " + "Possibly outdated previous assignments: {}", previousGenerationId, lastCompletedGenerationId, previousAssignment);
this.previousAssignment = ConnectorsAndTasks.EMPTY;
}
ClusterConfigState snapshot = coordinator.configSnapshot();
Set<String> configuredConnectors = new TreeSet<>(snapshot.connectors());
Set<ConnectorTaskId> configuredTasks = configuredConnectors.stream().flatMap(c -> snapshot.tasks(c).stream()).collect(Collectors.toSet());
// Base set: The set of configured connectors-and-tasks is a standalone snapshot that can
// be used to calculate derived sets
ConnectorsAndTasks configured = new ConnectorsAndTasks.Builder().with(configuredConnectors, configuredTasks).build();
log.debug("Configured assignments: {}", configured);
// Base set: The set of active connectors-and-tasks is a standalone snapshot that can be
// used to calculate derived sets
ConnectorsAndTasks activeAssignments = assignment(memberConfigs);
log.debug("Active assignments: {}", activeAssignments);
// appropriately and be ready to re-apply revocation of tasks
if (!previousRevocation.isEmpty()) {
if (previousRevocation.connectors().stream().anyMatch(c -> activeAssignments.connectors().contains(c)) || previousRevocation.tasks().stream().anyMatch(t -> activeAssignments.tasks().contains(t))) {
previousAssignment = activeAssignments;
canRevoke = true;
}
previousRevocation.connectors().clear();
previousRevocation.tasks().clear();
}
// Derived set: The set of deleted connectors-and-tasks is a derived set from the set
// difference of previous - configured
ConnectorsAndTasks deleted = diff(previousAssignment, configured);
log.debug("Deleted assignments: {}", deleted);
// Derived set: The set of remaining active connectors-and-tasks is a derived set from the
// set difference of active - deleted
ConnectorsAndTasks remainingActive = diff(activeAssignments, deleted);
log.debug("Remaining (excluding deleted) active assignments: {}", remainingActive);
// Derived set: The set of lost or unaccounted connectors-and-tasks is a derived set from
// the set difference of previous - active - deleted
ConnectorsAndTasks lostAssignments = diff(previousAssignment, activeAssignments, deleted);
log.debug("Lost assignments: {}", lostAssignments);
// Derived set: The set of new connectors-and-tasks is a derived set from the set
// difference of configured - previous - active
ConnectorsAndTasks newSubmissions = diff(configured, previousAssignment, activeAssignments);
log.debug("New assignments: {}", newSubmissions);
// A collection of the complete assignment
List<WorkerLoad> completeWorkerAssignment = workerAssignment(memberConfigs, ConnectorsAndTasks.EMPTY);
log.debug("Complete (ignoring deletions) worker assignments: {}", completeWorkerAssignment);
// Per worker connector assignments without removing deleted connectors yet
Map<String, Collection<String>> connectorAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
log.debug("Complete (ignoring deletions) connector assignments: {}", connectorAssignments);
// Per worker task assignments without removing deleted connectors yet
Map<String, Collection<ConnectorTaskId>> taskAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
log.debug("Complete (ignoring deletions) task assignments: {}", taskAssignments);
// A collection of the current assignment excluding the connectors-and-tasks to be deleted
List<WorkerLoad> currentWorkerAssignment = workerAssignment(memberConfigs, deleted);
Map<String, ConnectorsAndTasks> toRevoke = computeDeleted(deleted, connectorAssignments, taskAssignments);
log.debug("Connector and task to delete assignments: {}", toRevoke);
// Revoking redundant connectors/tasks if the workers have duplicate assignments
toRevoke.putAll(computeDuplicatedAssignments(memberConfigs, connectorAssignments, taskAssignments));
log.debug("Connector and task to revoke assignments (include duplicated assignments): {}", toRevoke);
// Recompute the complete assignment excluding the deleted connectors-and-tasks
completeWorkerAssignment = workerAssignment(memberConfigs, deleted);
connectorAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
taskAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
handleLostAssignments(lostAssignments, newSubmissions, completeWorkerAssignment, memberConfigs);
// Do not revoke resources for re-assignment while a delayed rebalance is active
// Also we do not revoke in two consecutive rebalances by the same leader
canRevoke = delay == 0 && canRevoke;
// Compute the connectors-and-tasks to be revoked for load balancing without taking into
// account the deleted ones.
log.debug("Can leader revoke tasks in this assignment? {} (delay: {})", canRevoke, delay);
if (canRevoke) {
Map<String, ConnectorsAndTasks> toExplicitlyRevoke = performTaskRevocation(activeAssignments, currentWorkerAssignment);
log.debug("Connector and task to revoke assignments: {}", toRevoke);
toExplicitlyRevoke.forEach((worker, assignment) -> {
ConnectorsAndTasks existing = toRevoke.computeIfAbsent(worker, v -> new ConnectorsAndTasks.Builder().build());
existing.connectors().addAll(assignment.connectors());
existing.tasks().addAll(assignment.tasks());
});
canRevoke = toExplicitlyRevoke.size() == 0;
} else {
canRevoke = delay == 0;
}
assignConnectors(completeWorkerAssignment, newSubmissions.connectors());
assignTasks(completeWorkerAssignment, newSubmissions.tasks());
log.debug("Current complete assignments: {}", currentWorkerAssignment);
log.debug("New complete assignments: {}", completeWorkerAssignment);
Map<String, Collection<String>> currentConnectorAssignments = currentWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
Map<String, Collection<ConnectorTaskId>> currentTaskAssignments = currentWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
Map<String, Collection<String>> incrementalConnectorAssignments = diff(connectorAssignments, currentConnectorAssignments);
Map<String, Collection<ConnectorTaskId>> incrementalTaskAssignments = diff(taskAssignments, currentTaskAssignments);
log.debug("Incremental connector assignments: {}", incrementalConnectorAssignments);
log.debug("Incremental task assignments: {}", incrementalTaskAssignments);
coordinator.leaderState(new LeaderState(memberConfigs, connectorAssignments, taskAssignments));
Map<String, ExtendedAssignment> assignments = fillAssignments(memberConfigs.keySet(), Assignment.NO_ERROR, leaderId, memberConfigs.get(leaderId).url(), maxOffset, incrementalConnectorAssignments, incrementalTaskAssignments, toRevoke, delay, protocolVersion);
previousAssignment = computePreviousAssignment(toRevoke, connectorAssignments, taskAssignments, lostAssignments);
previousGenerationId = coordinator.generationId();
previousMembers = memberConfigs.keySet();
log.debug("Actual assignments: {}", assignments);
return serializeAssignments(assignments);
}
use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks in project kafka by apache.
the class IncrementalCooperativeAssignor method diff.
private static ConnectorsAndTasks diff(ConnectorsAndTasks base, ConnectorsAndTasks... toSubtract) {
Collection<String> connectors = new TreeSet<>(base.connectors());
Collection<ConnectorTaskId> tasks = new TreeSet<>(base.tasks());
for (ConnectorsAndTasks sub : toSubtract) {
connectors.removeAll(sub.connectors());
tasks.removeAll(sub.tasks());
}
return new ConnectorsAndTasks.Builder().with(connectors, tasks).build();
}
use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks in project kafka by apache.
the class IncrementalCooperativeAssignor method performTaskRevocation.
/**
* Task revocation is based on an rough estimation of the lower average number of tasks before
* and after new workers join the group. If no new workers join, no revocation takes place.
* Based on this estimation, tasks are revoked until the new floor average is reached for
* each existing worker. The revoked tasks, once assigned to the new workers will maintain
* a balanced load among the group.
*
* @param activeAssignments
* @param completeWorkerAssignment
* @return
*/
private Map<String, ConnectorsAndTasks> performTaskRevocation(ConnectorsAndTasks activeAssignments, Collection<WorkerLoad> completeWorkerAssignment) {
int totalActiveConnectorsNum = activeAssignments.connectors().size();
int totalActiveTasksNum = activeAssignments.tasks().size();
Collection<WorkerLoad> existingWorkers = completeWorkerAssignment.stream().filter(wl -> wl.size() > 0).collect(Collectors.toList());
int existingWorkersNum = existingWorkers.size();
int totalWorkersNum = completeWorkerAssignment.size();
int newWorkersNum = totalWorkersNum - existingWorkersNum;
if (log.isDebugEnabled()) {
completeWorkerAssignment.forEach(wl -> log.debug("Per worker current load size; worker: {} connectors: {} tasks: {}", wl.worker(), wl.connectorsSize(), wl.tasksSize()));
}
Map<String, ConnectorsAndTasks> revoking = new HashMap<>();
// after logging the status
if (!(newWorkersNum > 0 && existingWorkersNum > 0)) {
log.debug("No task revocation required; workers with existing load: {} workers with " + "no load {} total workers {}", existingWorkersNum, newWorkersNum, totalWorkersNum);
// connectors and tasks as well
return revoking;
}
log.debug("Task revocation is required; workers with existing load: {} workers with " + "no load {} total workers {}", existingWorkersNum, newWorkersNum, totalWorkersNum);
// We have at least one worker assignment (the leader itself) so totalWorkersNum can't be 0
log.debug("Previous rounded down (floor) average number of connectors per worker {}", totalActiveConnectorsNum / existingWorkersNum);
int floorConnectors = totalActiveConnectorsNum / totalWorkersNum;
int ceilConnectors = floorConnectors + ((totalActiveConnectorsNum % totalWorkersNum == 0) ? 0 : 1);
log.debug("New average number of connectors per worker rounded down (floor) {} and rounded up (ceil) {}", floorConnectors, ceilConnectors);
log.debug("Previous rounded down (floor) average number of tasks per worker {}", totalActiveTasksNum / existingWorkersNum);
int floorTasks = totalActiveTasksNum / totalWorkersNum;
int ceilTasks = floorTasks + ((totalActiveTasksNum % totalWorkersNum == 0) ? 0 : 1);
log.debug("New average number of tasks per worker rounded down (floor) {} and rounded up (ceil) {}", floorTasks, ceilTasks);
int numToRevoke;
for (WorkerLoad existing : existingWorkers) {
Iterator<String> connectors = existing.connectors().iterator();
numToRevoke = existing.connectorsSize() - ceilConnectors;
for (int i = existing.connectorsSize(); i > floorConnectors && numToRevoke > 0; --i, --numToRevoke) {
ConnectorsAndTasks resources = revoking.computeIfAbsent(existing.worker(), w -> new ConnectorsAndTasks.Builder().build());
resources.connectors().add(connectors.next());
}
}
for (WorkerLoad existing : existingWorkers) {
Iterator<ConnectorTaskId> tasks = existing.tasks().iterator();
numToRevoke = existing.tasksSize() - ceilTasks;
log.debug("Tasks on worker {} is higher than ceiling, so revoking {} tasks", existing, numToRevoke);
for (int i = existing.tasksSize(); i > floorTasks && numToRevoke > 0; --i, --numToRevoke) {
ConnectorsAndTasks resources = revoking.computeIfAbsent(existing.worker(), w -> new ConnectorsAndTasks.Builder().build());
resources.tasks().add(tasks.next());
}
}
return revoking;
}
use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks in project kafka by apache.
the class IncrementalCooperativeAssignorTest method testLostAssignmentHandlingWhenWorkerBounces.
@Test
public void testLostAssignmentHandlingWhenWorkerBounces() {
// Customize assignor for this test case
time = new MockTime();
initAssignor();
assertTrue(assignor.candidateWorkersForReassignment.isEmpty());
assertEquals(0, assignor.scheduledRebalance);
assertEquals(0, assignor.delay);
Map<String, WorkerLoad> configuredAssignment = new HashMap<>();
configuredAssignment.put("worker0", workerLoad("worker0", 0, 2, 0, 4));
configuredAssignment.put("worker1", workerLoad("worker1", 2, 2, 4, 4));
configuredAssignment.put("worker2", workerLoad("worker2", 4, 2, 8, 4));
memberConfigs = memberConfigs(leader, offset, 0, 2);
ConnectorsAndTasks newSubmissions = new ConnectorsAndTasks.Builder().build();
// No lost assignments
assignor.handleLostAssignments(new ConnectorsAndTasks.Builder().build(), newSubmissions, new ArrayList<>(configuredAssignment.values()), memberConfigs);
assertThat("Wrong set of workers for reassignments", Collections.emptySet(), is(assignor.candidateWorkersForReassignment));
assertEquals(0, assignor.scheduledRebalance);
assertEquals(0, assignor.delay);
assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
String flakyWorker = "worker1";
WorkerLoad lostLoad = workerLoad(flakyWorker, 2, 2, 4, 4);
memberConfigs.remove(flakyWorker);
ConnectorsAndTasks lostAssignments = new ConnectorsAndTasks.Builder().withCopies(lostLoad.connectors(), lostLoad.tasks()).build();
// Lost assignments detected - No candidate worker has appeared yet (worker with no assignments)
assignor.handleLostAssignments(lostAssignments, newSubmissions, new ArrayList<>(configuredAssignment.values()), memberConfigs);
assertThat("Wrong set of workers for reassignments", Collections.emptySet(), is(assignor.candidateWorkersForReassignment));
assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
assertEquals(rebalanceDelay, assignor.delay);
assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
time.sleep(rebalanceDelay / 2);
rebalanceDelay /= 2;
// A new worker (probably returning worker) has joined
configuredAssignment.put(flakyWorker, new WorkerLoad.Builder(flakyWorker).build());
memberConfigs.put(flakyWorker, new ExtendedWorkerState(leaderUrl, offset, null));
assignor.handleLostAssignments(lostAssignments, newSubmissions, new ArrayList<>(configuredAssignment.values()), memberConfigs);
assertThat("Wrong set of workers for reassignments", Collections.singleton(flakyWorker), is(assignor.candidateWorkersForReassignment));
assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
assertEquals(rebalanceDelay, assignor.delay);
assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
time.sleep(rebalanceDelay);
// The new worker has still no assignments
assignor.handleLostAssignments(lostAssignments, newSubmissions, new ArrayList<>(configuredAssignment.values()), memberConfigs);
assertTrue("Wrong assignment of lost connectors", configuredAssignment.getOrDefault(flakyWorker, new WorkerLoad.Builder(flakyWorker).build()).connectors().containsAll(lostAssignments.connectors()));
assertTrue("Wrong assignment of lost tasks", configuredAssignment.getOrDefault(flakyWorker, new WorkerLoad.Builder(flakyWorker).build()).tasks().containsAll(lostAssignments.tasks()));
assertThat("Wrong set of workers for reassignments", Collections.emptySet(), is(assignor.candidateWorkersForReassignment));
assertEquals(0, assignor.scheduledRebalance);
assertEquals(0, assignor.delay);
}
use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks in project kafka by apache.
the class IncrementalCooperativeAssignor method computePreviousAssignment.
private ConnectorsAndTasks computePreviousAssignment(Map<String, ConnectorsAndTasks> toRevoke, Map<String, Collection<String>> connectorAssignments, Map<String, Collection<ConnectorTaskId>> taskAssignments, ConnectorsAndTasks lostAssignments) {
ConnectorsAndTasks previousAssignment = new ConnectorsAndTasks.Builder().with(connectorAssignments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet()), taskAssignments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet())).build();
for (ConnectorsAndTasks revoked : toRevoke.values()) {
previousAssignment.connectors().removeAll(revoked.connectors());
previousAssignment.tasks().removeAll(revoked.tasks());
previousRevocation.connectors().addAll(revoked.connectors());
previousRevocation.tasks().addAll(revoked.tasks());
}
// Depends on the previous assignment's collections being sets at the moment.
// TODO: make it independent
previousAssignment.connectors().addAll(lostAssignments.connectors());
previousAssignment.tasks().addAll(lostAssignments.tasks());
return previousAssignment;
}
Aggregations