Examples with WorkerLoad - org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad

Example 1 with WorkerLoad

use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad in project kafka by apache.

the class IncrementalCooperativeAssignor method performTaskAssignment.

/**
 * Performs task assignment based on the incremental cooperative connect protocol.
 * Read more on the design and implementation in:
 * {@see https://cwiki.apache.org/confluence/display/KAFKA/KIP-415%3A+Incremental+Cooperative+Rebalancing+in+Kafka+Connect}
 *
 * @param leaderId the ID of the group leader
 * @param maxOffset the latest known offset of the configuration topic
 * @param memberConfigs the metadata of all the members of the group as gather in the current
 * round of rebalancing
 * @param coordinator the worker coordinator instance that provide the configuration snapshot
 * and get assigned the leader state during this assignment
 * @param protocolVersion the Connect subprotocol version
 * @return the serialized assignment of tasks to the whole group, including assigned or
 * revoked tasks
 */
protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long maxOffset, Map<String, ExtendedWorkerState> memberConfigs, WorkerCoordinator coordinator, short protocolVersion) {
    log.debug("Performing task assignment during generation: {} with memberId: {}", coordinator.generationId(), coordinator.memberId());
    // Base set: The previous assignment of connectors-and-tasks is a standalone snapshot that
    // can be used to calculate derived sets
    log.debug("Previous assignments: {}", previousAssignment);
    int lastCompletedGenerationId = coordinator.lastCompletedGenerationId();
    if (previousGenerationId != lastCompletedGenerationId) {
        log.debug("Clearing the view of previous assignments due to generation mismatch between " + "previous generation ID {} and last completed generation ID {}. This can " + "happen if the leader fails to sync the assignment within a rebalancing round. " + "The following view of previous assignments might be outdated and will be " + "ignored by the leader in the current computation of new assignments. " + "Possibly outdated previous assignments: {}", previousGenerationId, lastCompletedGenerationId, previousAssignment);
        this.previousAssignment = ConnectorsAndTasks.EMPTY;
    }
    ClusterConfigState snapshot = coordinator.configSnapshot();
    Set<String> configuredConnectors = new TreeSet<>(snapshot.connectors());
    Set<ConnectorTaskId> configuredTasks = configuredConnectors.stream().flatMap(c -> snapshot.tasks(c).stream()).collect(Collectors.toSet());
    // Base set: The set of configured connectors-and-tasks is a standalone snapshot that can
    // be used to calculate derived sets
    ConnectorsAndTasks configured = new ConnectorsAndTasks.Builder().with(configuredConnectors, configuredTasks).build();
    log.debug("Configured assignments: {}", configured);
    // Base set: The set of active connectors-and-tasks is a standalone snapshot that can be
    // used to calculate derived sets
    ConnectorsAndTasks activeAssignments = assignment(memberConfigs);
    log.debug("Active assignments: {}", activeAssignments);
    // appropriately and be ready to re-apply revocation of tasks
    if (!previousRevocation.isEmpty()) {
        if (previousRevocation.connectors().stream().anyMatch(c -> activeAssignments.connectors().contains(c)) || previousRevocation.tasks().stream().anyMatch(t -> activeAssignments.tasks().contains(t))) {
            previousAssignment = activeAssignments;
            canRevoke = true;
        }
        previousRevocation.connectors().clear();
        previousRevocation.tasks().clear();
    }
    // Derived set: The set of deleted connectors-and-tasks is a derived set from the set
    // difference of previous - configured
    ConnectorsAndTasks deleted = diff(previousAssignment, configured);
    log.debug("Deleted assignments: {}", deleted);
    // Derived set: The set of remaining active connectors-and-tasks is a derived set from the
    // set difference of active - deleted
    ConnectorsAndTasks remainingActive = diff(activeAssignments, deleted);
    log.debug("Remaining (excluding deleted) active assignments: {}", remainingActive);
    // Derived set: The set of lost or unaccounted connectors-and-tasks is a derived set from
    // the set difference of previous - active - deleted
    ConnectorsAndTasks lostAssignments = diff(previousAssignment, activeAssignments, deleted);
    log.debug("Lost assignments: {}", lostAssignments);
    // Derived set: The set of new connectors-and-tasks is a derived set from the set
    // difference of configured - previous - active
    ConnectorsAndTasks newSubmissions = diff(configured, previousAssignment, activeAssignments);
    log.debug("New assignments: {}", newSubmissions);
    // A collection of the complete assignment
    List<WorkerLoad> completeWorkerAssignment = workerAssignment(memberConfigs, ConnectorsAndTasks.EMPTY);
    log.debug("Complete (ignoring deletions) worker assignments: {}", completeWorkerAssignment);
    // Per worker connector assignments without removing deleted connectors yet
    Map<String, Collection<String>> connectorAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
    log.debug("Complete (ignoring deletions) connector assignments: {}", connectorAssignments);
    // Per worker task assignments without removing deleted connectors yet
    Map<String, Collection<ConnectorTaskId>> taskAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
    log.debug("Complete (ignoring deletions) task assignments: {}", taskAssignments);
    // A collection of the current assignment excluding the connectors-and-tasks to be deleted
    List<WorkerLoad> currentWorkerAssignment = workerAssignment(memberConfigs, deleted);
    Map<String, ConnectorsAndTasks> toRevoke = computeDeleted(deleted, connectorAssignments, taskAssignments);
    log.debug("Connector and task to delete assignments: {}", toRevoke);
    // Revoking redundant connectors/tasks if the workers have duplicate assignments
    toRevoke.putAll(computeDuplicatedAssignments(memberConfigs, connectorAssignments, taskAssignments));
    log.debug("Connector and task to revoke assignments (include duplicated assignments): {}", toRevoke);
    // Recompute the complete assignment excluding the deleted connectors-and-tasks
    completeWorkerAssignment = workerAssignment(memberConfigs, deleted);
    connectorAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
    taskAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
    handleLostAssignments(lostAssignments, newSubmissions, completeWorkerAssignment, memberConfigs);
    // Do not revoke resources for re-assignment while a delayed rebalance is active
    // Also we do not revoke in two consecutive rebalances by the same leader
    canRevoke = delay == 0 && canRevoke;
    // Compute the connectors-and-tasks to be revoked for load balancing without taking into
    // account the deleted ones.
    log.debug("Can leader revoke tasks in this assignment? {} (delay: {})", canRevoke, delay);
    if (canRevoke) {
        Map<String, ConnectorsAndTasks> toExplicitlyRevoke = performTaskRevocation(activeAssignments, currentWorkerAssignment);
        log.debug("Connector and task to revoke assignments: {}", toRevoke);
        toExplicitlyRevoke.forEach((worker, assignment) -> {
            ConnectorsAndTasks existing = toRevoke.computeIfAbsent(worker, v -> new ConnectorsAndTasks.Builder().build());
            existing.connectors().addAll(assignment.connectors());
            existing.tasks().addAll(assignment.tasks());
        });
        canRevoke = toExplicitlyRevoke.size() == 0;
    } else {
        canRevoke = delay == 0;
    }
    assignConnectors(completeWorkerAssignment, newSubmissions.connectors());
    assignTasks(completeWorkerAssignment, newSubmissions.tasks());
    log.debug("Current complete assignments: {}", currentWorkerAssignment);
    log.debug("New complete assignments: {}", completeWorkerAssignment);
    Map<String, Collection<String>> currentConnectorAssignments = currentWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
    Map<String, Collection<ConnectorTaskId>> currentTaskAssignments = currentWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
    Map<String, Collection<String>> incrementalConnectorAssignments = diff(connectorAssignments, currentConnectorAssignments);
    Map<String, Collection<ConnectorTaskId>> incrementalTaskAssignments = diff(taskAssignments, currentTaskAssignments);
    log.debug("Incremental connector assignments: {}", incrementalConnectorAssignments);
    log.debug("Incremental task assignments: {}", incrementalTaskAssignments);
    coordinator.leaderState(new LeaderState(memberConfigs, connectorAssignments, taskAssignments));
    Map<String, ExtendedAssignment> assignments = fillAssignments(memberConfigs.keySet(), Assignment.NO_ERROR, leaderId, memberConfigs.get(leaderId).url(), maxOffset, incrementalConnectorAssignments, incrementalTaskAssignments, toRevoke, delay, protocolVersion);
    previousAssignment = computePreviousAssignment(toRevoke, connectorAssignments, taskAssignments, lostAssignments);
    previousGenerationId = coordinator.generationId();
    previousMembers = memberConfigs.keySet();
    log.debug("Actual assignments: {}", assignments);
    return serializeAssignments(assignments);
}

Also used : IntStream(java.util.stream.IntStream) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) HashMap(java.util.HashMap) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) CONNECT_PROTOCOL_V2(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2) JoinGroupResponseMember(org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember) CONNECT_PROTOCOL_V1(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1) Assignment(org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment) LogContext(org.apache.kafka.common.utils.LogContext) Map(java.util.Map) LinkedHashSet(java.util.LinkedHashSet) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) Iterator(java.util.Iterator) LeaderState(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState) Collection(java.util.Collection) Set(java.util.Set) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Entry(java.util.Map.Entry) Collections(java.util.Collections) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) LeaderState(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState) TreeSet(java.util.TreeSet) Collection(java.util.Collection)

Example 2 with WorkerLoad

use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad in project kafka by apache.

the class IncrementalCooperativeAssignor method performTaskRevocation.

/**
 * Task revocation is based on an rough estimation of the lower average number of tasks before
 * and after new workers join the group. If no new workers join, no revocation takes place.
 * Based on this estimation, tasks are revoked until the new floor average is reached for
 * each existing worker. The revoked tasks, once assigned to the new workers will maintain
 * a balanced load among the group.
 *
 * @param activeAssignments
 * @param completeWorkerAssignment
 * @return
 */
private Map<String, ConnectorsAndTasks> performTaskRevocation(ConnectorsAndTasks activeAssignments, Collection<WorkerLoad> completeWorkerAssignment) {
    int totalActiveConnectorsNum = activeAssignments.connectors().size();
    int totalActiveTasksNum = activeAssignments.tasks().size();
    Collection<WorkerLoad> existingWorkers = completeWorkerAssignment.stream().filter(wl -> wl.size() > 0).collect(Collectors.toList());
    int existingWorkersNum = existingWorkers.size();
    int totalWorkersNum = completeWorkerAssignment.size();
    int newWorkersNum = totalWorkersNum - existingWorkersNum;
    if (log.isDebugEnabled()) {
        completeWorkerAssignment.forEach(wl -> log.debug("Per worker current load size; worker: {} connectors: {} tasks: {}", wl.worker(), wl.connectorsSize(), wl.tasksSize()));
    }
    Map<String, ConnectorsAndTasks> revoking = new HashMap<>();
    // after logging the status
    if (!(newWorkersNum > 0 && existingWorkersNum > 0)) {
        log.debug("No task revocation required; workers with existing load: {} workers with " + "no load {} total workers {}", existingWorkersNum, newWorkersNum, totalWorkersNum);
        // connectors and tasks as well
        return revoking;
    }
    log.debug("Task revocation is required; workers with existing load: {} workers with " + "no load {} total workers {}", existingWorkersNum, newWorkersNum, totalWorkersNum);
    // We have at least one worker assignment (the leader itself) so totalWorkersNum can't be 0
    log.debug("Previous rounded down (floor) average number of connectors per worker {}", totalActiveConnectorsNum / existingWorkersNum);
    int floorConnectors = totalActiveConnectorsNum / totalWorkersNum;
    int ceilConnectors = floorConnectors + ((totalActiveConnectorsNum % totalWorkersNum == 0) ? 0 : 1);
    log.debug("New average number of connectors per worker rounded down (floor) {} and rounded up (ceil) {}", floorConnectors, ceilConnectors);
    log.debug("Previous rounded down (floor) average number of tasks per worker {}", totalActiveTasksNum / existingWorkersNum);
    int floorTasks = totalActiveTasksNum / totalWorkersNum;
    int ceilTasks = floorTasks + ((totalActiveTasksNum % totalWorkersNum == 0) ? 0 : 1);
    log.debug("New average number of tasks per worker rounded down (floor) {} and rounded up (ceil) {}", floorTasks, ceilTasks);
    int numToRevoke;
    for (WorkerLoad existing : existingWorkers) {
        Iterator<String> connectors = existing.connectors().iterator();
        numToRevoke = existing.connectorsSize() - ceilConnectors;
        for (int i = existing.connectorsSize(); i > floorConnectors && numToRevoke > 0; --i, --numToRevoke) {
            ConnectorsAndTasks resources = revoking.computeIfAbsent(existing.worker(), w -> new ConnectorsAndTasks.Builder().build());
            resources.connectors().add(connectors.next());
        }
    }
    for (WorkerLoad existing : existingWorkers) {
        Iterator<ConnectorTaskId> tasks = existing.tasks().iterator();
        numToRevoke = existing.tasksSize() - ceilTasks;
        log.debug("Tasks on worker {} is higher than ceiling, so revoking {} tasks", existing, numToRevoke);
        for (int i = existing.tasksSize(); i > floorTasks && numToRevoke > 0; --i, --numToRevoke) {
            ConnectorsAndTasks resources = revoking.computeIfAbsent(existing.worker(), w -> new ConnectorsAndTasks.Builder().build());
            resources.tasks().add(tasks.next());
        }
    }
    return revoking;
}

Example 3 with WorkerLoad

use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad in project kafka by apache.

the class IncrementalCooperativeAssignor method assignConnectors.

/**
 * Perform a round-robin assignment of connectors to workers with existing worker load. This
 * assignment tries to balance the load between workers, by assigning connectors to workers
 * that have equal load, starting with the least loaded workers.
 *
 * @param workerAssignment the current worker assignment; assigned connectors are added to this list
 * @param connectors the connectors to be assigned
 */
protected void assignConnectors(List<WorkerLoad> workerAssignment, Collection<String> connectors) {
    workerAssignment.sort(WorkerLoad.connectorComparator());
    WorkerLoad first = workerAssignment.get(0);
    Iterator<String> load = connectors.iterator();
    while (load.hasNext()) {
        int firstLoad = first.connectorsSize();
        int upTo = IntStream.range(0, workerAssignment.size()).filter(i -> workerAssignment.get(i).connectorsSize() > firstLoad).findFirst().orElse(workerAssignment.size());
        for (WorkerLoad worker : workerAssignment.subList(0, upTo)) {
            String connector = load.next();
            log.debug("Assigning connector {} to {}", connector, worker.worker());
            worker.assign(connector);
            if (!load.hasNext()) {
                break;
            }
        }
    }
}

Also used : WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad)

Example 4 with WorkerLoad

use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad in project kafka by apache.

the class IncrementalCooperativeAssignor method assignTasks.

/**
 * Perform a round-robin assignment of tasks to workers with existing worker load. This
 * assignment tries to balance the load between workers, by assigning tasks to workers that
 * have equal load, starting with the least loaded workers.
 *
 * @param workerAssignment the current worker assignment; assigned tasks are added to this list
 * @param tasks the tasks to be assigned
 */
protected void assignTasks(List<WorkerLoad> workerAssignment, Collection<ConnectorTaskId> tasks) {
    workerAssignment.sort(WorkerLoad.taskComparator());
    WorkerLoad first = workerAssignment.get(0);
    Iterator<ConnectorTaskId> load = tasks.iterator();
    while (load.hasNext()) {
        int firstLoad = first.tasksSize();
        int upTo = IntStream.range(0, workerAssignment.size()).filter(i -> workerAssignment.get(i).tasksSize() > firstLoad).findFirst().orElse(workerAssignment.size());
        for (WorkerLoad worker : workerAssignment.subList(0, upTo)) {
            ConnectorTaskId task = load.next();
            log.debug("Assigning task {} to {}", task, worker.worker());
            worker.assign(task);
            if (!load.hasNext()) {
                break;
            }
        }
    }
}

Also used : WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId)

Example 5 with WorkerLoad

use of org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad in project kafka by apache.

the class IncrementalCooperativeAssignor method handleLostAssignments.

// visible for testing
protected void handleLostAssignments(ConnectorsAndTasks lostAssignments, ConnectorsAndTasks newSubmissions, List<WorkerLoad> completeWorkerAssignment, Map<String, ExtendedWorkerState> memberConfigs) {
    if (lostAssignments.isEmpty()) {
        resetDelay();
        return;
    }
    final long now = time.milliseconds();
    log.debug("Found the following connectors and tasks missing from previous assignments: " + lostAssignments);
    if (scheduledRebalance <= 0 && memberConfigs.keySet().containsAll(previousMembers)) {
        log.debug("No worker seems to have departed the group during the rebalance. The " + "missing assignments that the leader is detecting are probably due to some " + "workers failing to receive the new assignments in the previous rebalance. " + "Will reassign missing tasks as new tasks");
        newSubmissions.connectors().addAll(lostAssignments.connectors());
        newSubmissions.tasks().addAll(lostAssignments.tasks());
        return;
    }
    if (scheduledRebalance > 0 && now >= scheduledRebalance) {
        // delayed rebalance expired and it's time to assign resources
        log.debug("Delayed rebalance expired. Reassigning lost tasks");
        List<WorkerLoad> candidateWorkerLoad = Collections.emptyList();
        if (!candidateWorkersForReassignment.isEmpty()) {
            candidateWorkerLoad = pickCandidateWorkerForReassignment(completeWorkerAssignment);
        }
        if (!candidateWorkerLoad.isEmpty()) {
            log.debug("Assigning lost tasks to {} candidate workers: {}", candidateWorkerLoad.size(), candidateWorkerLoad.stream().map(WorkerLoad::worker).collect(Collectors.joining(",")));
            Iterator<WorkerLoad> candidateWorkerIterator = candidateWorkerLoad.iterator();
            for (String connector : lostAssignments.connectors()) {
                // Loop over the candidate workers as many times as it takes
                if (!candidateWorkerIterator.hasNext()) {
                    candidateWorkerIterator = candidateWorkerLoad.iterator();
                }
                WorkerLoad worker = candidateWorkerIterator.next();
                log.debug("Assigning connector id {} to member {}", connector, worker.worker());
                worker.assign(connector);
            }
            candidateWorkerIterator = candidateWorkerLoad.iterator();
            for (ConnectorTaskId task : lostAssignments.tasks()) {
                if (!candidateWorkerIterator.hasNext()) {
                    candidateWorkerIterator = candidateWorkerLoad.iterator();
                }
                WorkerLoad worker = candidateWorkerIterator.next();
                log.debug("Assigning task id {} to member {}", task, worker.worker());
                worker.assign(task);
            }
        } else {
            log.debug("No single candidate worker was found to assign lost tasks. Treating lost tasks as new tasks");
            newSubmissions.connectors().addAll(lostAssignments.connectors());
            newSubmissions.tasks().addAll(lostAssignments.tasks());
        }
        resetDelay();
    } else {
        candidateWorkersForReassignment.addAll(candidateWorkersForReassignment(completeWorkerAssignment));
        if (now < scheduledRebalance) {
            // a delayed rebalance is in progress, but it's not yet time to reassign
            // unaccounted resources
            delay = calculateDelay(now);
            log.debug("Delayed rebalance in progress. Task reassignment is postponed. New computed rebalance delay: {}", delay);
        } else {
            // This means scheduledRebalance == 0
            // We could also also extract the current minimum delay from the group, to make
            // independent of consecutive leader failures, but this optimization is skipped
            // at the moment
            delay = maxDelay;
            log.debug("Resetting rebalance delay to the max: {}. scheduledRebalance: {} now: {} diff scheduledRebalance - now: {}", delay, scheduledRebalance, now, scheduledRebalance - now);
        }
        scheduledRebalance = now + delay;
    }
}

Also used : WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId)

Aggregations

WorkerLoad (org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad)10 HashMap (java.util.HashMap)6 ConnectorsAndTasks (org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks)6 ConnectorTaskId (org.apache.kafka.connect.util.ConnectorTaskId)6 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 MockTime (org.apache.kafka.common.utils.MockTime)4 HashSet (java.util.HashSet)3 ByteBuffer (java.nio.ByteBuffer)2 Collection (java.util.Collection)2 Collections (java.util.Collections)2 Iterator (java.util.Iterator)2 LinkedHashSet (java.util.LinkedHashSet)2 List (java.util.List)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 Objects (java.util.Objects)2 Set (java.util.Set)2 TreeSet (java.util.TreeSet)2 Function (java.util.function.Function)2