Search in sources :

Example 1 with Assignment

use of org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment in project kafka by apache.

the class IncrementalCooperativeAssignor method performTaskAssignment.

/**
 * Performs task assignment based on the incremental cooperative connect protocol.
 * Read more on the design and implementation in:
 * {@see https://cwiki.apache.org/confluence/display/KAFKA/KIP-415%3A+Incremental+Cooperative+Rebalancing+in+Kafka+Connect}
 *
 * @param leaderId the ID of the group leader
 * @param maxOffset the latest known offset of the configuration topic
 * @param memberConfigs the metadata of all the members of the group as gather in the current
 * round of rebalancing
 * @param coordinator the worker coordinator instance that provide the configuration snapshot
 * and get assigned the leader state during this assignment
 * @param protocolVersion the Connect subprotocol version
 * @return the serialized assignment of tasks to the whole group, including assigned or
 * revoked tasks
 */
protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long maxOffset, Map<String, ExtendedWorkerState> memberConfigs, WorkerCoordinator coordinator, short protocolVersion) {
    log.debug("Performing task assignment during generation: {} with memberId: {}", coordinator.generationId(), coordinator.memberId());
    // Base set: The previous assignment of connectors-and-tasks is a standalone snapshot that
    // can be used to calculate derived sets
    log.debug("Previous assignments: {}", previousAssignment);
    int lastCompletedGenerationId = coordinator.lastCompletedGenerationId();
    if (previousGenerationId != lastCompletedGenerationId) {
        log.debug("Clearing the view of previous assignments due to generation mismatch between " + "previous generation ID {} and last completed generation ID {}. This can " + "happen if the leader fails to sync the assignment within a rebalancing round. " + "The following view of previous assignments might be outdated and will be " + "ignored by the leader in the current computation of new assignments. " + "Possibly outdated previous assignments: {}", previousGenerationId, lastCompletedGenerationId, previousAssignment);
        this.previousAssignment = ConnectorsAndTasks.EMPTY;
    }
    ClusterConfigState snapshot = coordinator.configSnapshot();
    Set<String> configuredConnectors = new TreeSet<>(snapshot.connectors());
    Set<ConnectorTaskId> configuredTasks = configuredConnectors.stream().flatMap(c -> snapshot.tasks(c).stream()).collect(Collectors.toSet());
    // Base set: The set of configured connectors-and-tasks is a standalone snapshot that can
    // be used to calculate derived sets
    ConnectorsAndTasks configured = new ConnectorsAndTasks.Builder().with(configuredConnectors, configuredTasks).build();
    log.debug("Configured assignments: {}", configured);
    // Base set: The set of active connectors-and-tasks is a standalone snapshot that can be
    // used to calculate derived sets
    ConnectorsAndTasks activeAssignments = assignment(memberConfigs);
    log.debug("Active assignments: {}", activeAssignments);
    // appropriately and be ready to re-apply revocation of tasks
    if (!previousRevocation.isEmpty()) {
        if (previousRevocation.connectors().stream().anyMatch(c -> activeAssignments.connectors().contains(c)) || previousRevocation.tasks().stream().anyMatch(t -> activeAssignments.tasks().contains(t))) {
            previousAssignment = activeAssignments;
            canRevoke = true;
        }
        previousRevocation.connectors().clear();
        previousRevocation.tasks().clear();
    }
    // Derived set: The set of deleted connectors-and-tasks is a derived set from the set
    // difference of previous - configured
    ConnectorsAndTasks deleted = diff(previousAssignment, configured);
    log.debug("Deleted assignments: {}", deleted);
    // Derived set: The set of remaining active connectors-and-tasks is a derived set from the
    // set difference of active - deleted
    ConnectorsAndTasks remainingActive = diff(activeAssignments, deleted);
    log.debug("Remaining (excluding deleted) active assignments: {}", remainingActive);
    // Derived set: The set of lost or unaccounted connectors-and-tasks is a derived set from
    // the set difference of previous - active - deleted
    ConnectorsAndTasks lostAssignments = diff(previousAssignment, activeAssignments, deleted);
    log.debug("Lost assignments: {}", lostAssignments);
    // Derived set: The set of new connectors-and-tasks is a derived set from the set
    // difference of configured - previous - active
    ConnectorsAndTasks newSubmissions = diff(configured, previousAssignment, activeAssignments);
    log.debug("New assignments: {}", newSubmissions);
    // A collection of the complete assignment
    List<WorkerLoad> completeWorkerAssignment = workerAssignment(memberConfigs, ConnectorsAndTasks.EMPTY);
    log.debug("Complete (ignoring deletions) worker assignments: {}", completeWorkerAssignment);
    // Per worker connector assignments without removing deleted connectors yet
    Map<String, Collection<String>> connectorAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
    log.debug("Complete (ignoring deletions) connector assignments: {}", connectorAssignments);
    // Per worker task assignments without removing deleted connectors yet
    Map<String, Collection<ConnectorTaskId>> taskAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
    log.debug("Complete (ignoring deletions) task assignments: {}", taskAssignments);
    // A collection of the current assignment excluding the connectors-and-tasks to be deleted
    List<WorkerLoad> currentWorkerAssignment = workerAssignment(memberConfigs, deleted);
    Map<String, ConnectorsAndTasks> toRevoke = computeDeleted(deleted, connectorAssignments, taskAssignments);
    log.debug("Connector and task to delete assignments: {}", toRevoke);
    // Revoking redundant connectors/tasks if the workers have duplicate assignments
    toRevoke.putAll(computeDuplicatedAssignments(memberConfigs, connectorAssignments, taskAssignments));
    log.debug("Connector and task to revoke assignments (include duplicated assignments): {}", toRevoke);
    // Recompute the complete assignment excluding the deleted connectors-and-tasks
    completeWorkerAssignment = workerAssignment(memberConfigs, deleted);
    connectorAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
    taskAssignments = completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
    handleLostAssignments(lostAssignments, newSubmissions, completeWorkerAssignment, memberConfigs);
    // Do not revoke resources for re-assignment while a delayed rebalance is active
    // Also we do not revoke in two consecutive rebalances by the same leader
    canRevoke = delay == 0 && canRevoke;
    // Compute the connectors-and-tasks to be revoked for load balancing without taking into
    // account the deleted ones.
    log.debug("Can leader revoke tasks in this assignment? {} (delay: {})", canRevoke, delay);
    if (canRevoke) {
        Map<String, ConnectorsAndTasks> toExplicitlyRevoke = performTaskRevocation(activeAssignments, currentWorkerAssignment);
        log.debug("Connector and task to revoke assignments: {}", toRevoke);
        toExplicitlyRevoke.forEach((worker, assignment) -> {
            ConnectorsAndTasks existing = toRevoke.computeIfAbsent(worker, v -> new ConnectorsAndTasks.Builder().build());
            existing.connectors().addAll(assignment.connectors());
            existing.tasks().addAll(assignment.tasks());
        });
        canRevoke = toExplicitlyRevoke.size() == 0;
    } else {
        canRevoke = delay == 0;
    }
    assignConnectors(completeWorkerAssignment, newSubmissions.connectors());
    assignTasks(completeWorkerAssignment, newSubmissions.tasks());
    log.debug("Current complete assignments: {}", currentWorkerAssignment);
    log.debug("New complete assignments: {}", completeWorkerAssignment);
    Map<String, Collection<String>> currentConnectorAssignments = currentWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
    Map<String, Collection<ConnectorTaskId>> currentTaskAssignments = currentWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
    Map<String, Collection<String>> incrementalConnectorAssignments = diff(connectorAssignments, currentConnectorAssignments);
    Map<String, Collection<ConnectorTaskId>> incrementalTaskAssignments = diff(taskAssignments, currentTaskAssignments);
    log.debug("Incremental connector assignments: {}", incrementalConnectorAssignments);
    log.debug("Incremental task assignments: {}", incrementalTaskAssignments);
    coordinator.leaderState(new LeaderState(memberConfigs, connectorAssignments, taskAssignments));
    Map<String, ExtendedAssignment> assignments = fillAssignments(memberConfigs.keySet(), Assignment.NO_ERROR, leaderId, memberConfigs.get(leaderId).url(), maxOffset, incrementalConnectorAssignments, incrementalTaskAssignments, toRevoke, delay, protocolVersion);
    previousAssignment = computePreviousAssignment(toRevoke, connectorAssignments, taskAssignments, lostAssignments);
    previousGenerationId = coordinator.generationId();
    previousMembers = memberConfigs.keySet();
    log.debug("Actual assignments: {}", assignments);
    return serializeAssignments(assignments);
}
Also used : IntStream(java.util.stream.IntStream) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) HashMap(java.util.HashMap) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) CONNECT_PROTOCOL_V2(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2) JoinGroupResponseMember(org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember) CONNECT_PROTOCOL_V1(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1) Assignment(org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment) LogContext(org.apache.kafka.common.utils.LogContext) Map(java.util.Map) LinkedHashSet(java.util.LinkedHashSet) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) Iterator(java.util.Iterator) LeaderState(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState) Collection(java.util.Collection) Set(java.util.Set) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Entry(java.util.Map.Entry) Collections(java.util.Collections) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) LeaderState(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState) TreeSet(java.util.TreeSet) Collection(java.util.Collection)

Example 2 with Assignment

use of org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment in project kafka by apache.

the class IncrementalCooperativeAssignor method performTaskRevocation.

/**
 * Task revocation is based on an rough estimation of the lower average number of tasks before
 * and after new workers join the group. If no new workers join, no revocation takes place.
 * Based on this estimation, tasks are revoked until the new floor average is reached for
 * each existing worker. The revoked tasks, once assigned to the new workers will maintain
 * a balanced load among the group.
 *
 * @param activeAssignments
 * @param completeWorkerAssignment
 * @return
 */
private Map<String, ConnectorsAndTasks> performTaskRevocation(ConnectorsAndTasks activeAssignments, Collection<WorkerLoad> completeWorkerAssignment) {
    int totalActiveConnectorsNum = activeAssignments.connectors().size();
    int totalActiveTasksNum = activeAssignments.tasks().size();
    Collection<WorkerLoad> existingWorkers = completeWorkerAssignment.stream().filter(wl -> wl.size() > 0).collect(Collectors.toList());
    int existingWorkersNum = existingWorkers.size();
    int totalWorkersNum = completeWorkerAssignment.size();
    int newWorkersNum = totalWorkersNum - existingWorkersNum;
    if (log.isDebugEnabled()) {
        completeWorkerAssignment.forEach(wl -> log.debug("Per worker current load size; worker: {} connectors: {} tasks: {}", wl.worker(), wl.connectorsSize(), wl.tasksSize()));
    }
    Map<String, ConnectorsAndTasks> revoking = new HashMap<>();
    // after logging the status
    if (!(newWorkersNum > 0 && existingWorkersNum > 0)) {
        log.debug("No task revocation required; workers with existing load: {} workers with " + "no load {} total workers {}", existingWorkersNum, newWorkersNum, totalWorkersNum);
        // connectors and tasks as well
        return revoking;
    }
    log.debug("Task revocation is required; workers with existing load: {} workers with " + "no load {} total workers {}", existingWorkersNum, newWorkersNum, totalWorkersNum);
    // We have at least one worker assignment (the leader itself) so totalWorkersNum can't be 0
    log.debug("Previous rounded down (floor) average number of connectors per worker {}", totalActiveConnectorsNum / existingWorkersNum);
    int floorConnectors = totalActiveConnectorsNum / totalWorkersNum;
    int ceilConnectors = floorConnectors + ((totalActiveConnectorsNum % totalWorkersNum == 0) ? 0 : 1);
    log.debug("New average number of connectors per worker rounded down (floor) {} and rounded up (ceil) {}", floorConnectors, ceilConnectors);
    log.debug("Previous rounded down (floor) average number of tasks per worker {}", totalActiveTasksNum / existingWorkersNum);
    int floorTasks = totalActiveTasksNum / totalWorkersNum;
    int ceilTasks = floorTasks + ((totalActiveTasksNum % totalWorkersNum == 0) ? 0 : 1);
    log.debug("New average number of tasks per worker rounded down (floor) {} and rounded up (ceil) {}", floorTasks, ceilTasks);
    int numToRevoke;
    for (WorkerLoad existing : existingWorkers) {
        Iterator<String> connectors = existing.connectors().iterator();
        numToRevoke = existing.connectorsSize() - ceilConnectors;
        for (int i = existing.connectorsSize(); i > floorConnectors && numToRevoke > 0; --i, --numToRevoke) {
            ConnectorsAndTasks resources = revoking.computeIfAbsent(existing.worker(), w -> new ConnectorsAndTasks.Builder().build());
            resources.connectors().add(connectors.next());
        }
    }
    for (WorkerLoad existing : existingWorkers) {
        Iterator<ConnectorTaskId> tasks = existing.tasks().iterator();
        numToRevoke = existing.tasksSize() - ceilTasks;
        log.debug("Tasks on worker {} is higher than ceiling, so revoking {} tasks", existing, numToRevoke);
        for (int i = existing.tasksSize(); i > floorTasks && numToRevoke > 0; --i, --numToRevoke) {
            ConnectorsAndTasks resources = revoking.computeIfAbsent(existing.worker(), w -> new ConnectorsAndTasks.Builder().build());
            resources.tasks().add(tasks.next());
        }
    }
    return revoking;
}
Also used : IntStream(java.util.stream.IntStream) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) HashMap(java.util.HashMap) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) CONNECT_PROTOCOL_V2(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2) JoinGroupResponseMember(org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember) CONNECT_PROTOCOL_V1(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1) Assignment(org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment) LogContext(org.apache.kafka.common.utils.LogContext) Map(java.util.Map) LinkedHashSet(java.util.LinkedHashSet) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) Iterator(java.util.Iterator) LeaderState(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState) Collection(java.util.Collection) Set(java.util.Set) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Entry(java.util.Map.Entry) Collections(java.util.Collections) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) HashMap(java.util.HashMap)

Example 3 with Assignment

use of org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment in project kafka by apache.

the class EagerAssignor method fillAssignmentsAndSerialize.

private Map<String, ByteBuffer> fillAssignmentsAndSerialize(Collection<String> members, short error, String leaderId, String leaderUrl, long maxOffset, Map<String, Collection<String>> connectorAssignments, Map<String, Collection<ConnectorTaskId>> taskAssignments) {
    Map<String, ByteBuffer> groupAssignment = new HashMap<>();
    for (String member : members) {
        Collection<String> connectors = connectorAssignments.get(member);
        if (connectors == null) {
            connectors = Collections.emptyList();
        }
        Collection<ConnectorTaskId> tasks = taskAssignments.get(member);
        if (tasks == null) {
            tasks = Collections.emptyList();
        }
        Assignment assignment = new Assignment(error, leaderId, leaderUrl, maxOffset, connectors, tasks);
        log.debug("Assignment: {} -> {}", member, assignment);
        groupAssignment.put(member, ConnectProtocol.serializeAssignment(assignment));
    }
    log.debug("Finished assignment");
    return groupAssignment;
}
Also used : Assignment(org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) HashMap(java.util.HashMap) ByteBuffer(java.nio.ByteBuffer)

Example 4 with Assignment

use of org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment in project kafka by apache.

the class IncrementalCooperativeAssignor method performAssignment.

@Override
public Map<String, ByteBuffer> performAssignment(String leaderId, String protocol, List<JoinGroupResponseMember> allMemberMetadata, WorkerCoordinator coordinator) {
    log.debug("Performing task assignment");
    Map<String, ExtendedWorkerState> memberConfigs = new HashMap<>();
    for (JoinGroupResponseMember member : allMemberMetadata) {
        memberConfigs.put(member.memberId(), IncrementalCooperativeConnectProtocol.deserializeMetadata(ByteBuffer.wrap(member.metadata())));
    }
    log.debug("Member configs: {}", memberConfigs);
    // The new config offset is the maximum seen by any member. We always perform assignment using this offset,
    // even if some members have fallen behind. The config offset used to generate the assignment is included in
    // the response so members that have fallen behind will not use the assignment until they have caught up.
    long maxOffset = memberConfigs.values().stream().map(ExtendedWorkerState::offset).max(Long::compare).get();
    log.debug("Max config offset root: {}, local snapshot config offsets root: {}", maxOffset, coordinator.configSnapshot().offset());
    short protocolVersion = memberConfigs.values().stream().allMatch(state -> state.assignment().version() == CONNECT_PROTOCOL_V2) ? CONNECT_PROTOCOL_V2 : CONNECT_PROTOCOL_V1;
    Long leaderOffset = ensureLeaderConfig(maxOffset, coordinator);
    if (leaderOffset == null) {
        Map<String, ExtendedAssignment> assignments = fillAssignments(memberConfigs.keySet(), Assignment.CONFIG_MISMATCH, leaderId, memberConfigs.get(leaderId).url(), maxOffset, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), 0, protocolVersion);
        return serializeAssignments(assignments);
    }
    return performTaskAssignment(leaderId, leaderOffset, memberConfigs, coordinator, protocolVersion);
}
Also used : IntStream(java.util.stream.IntStream) ConnectorTaskId(org.apache.kafka.connect.util.ConnectorTaskId) HashMap(java.util.HashMap) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) CONNECT_PROTOCOL_V2(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2) JoinGroupResponseMember(org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember) CONNECT_PROTOCOL_V1(org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1) Assignment(org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment) LogContext(org.apache.kafka.common.utils.LogContext) Map(java.util.Map) LinkedHashSet(java.util.LinkedHashSet) Logger(org.slf4j.Logger) Time(org.apache.kafka.common.utils.Time) Iterator(java.util.Iterator) LeaderState(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState) Collection(java.util.Collection) Set(java.util.Set) WorkerLoad(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad) ConnectorsAndTasks(org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Entry(java.util.Map.Entry) Collections(java.util.Collections) HashMap(java.util.HashMap) JoinGroupResponseMember(org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember)

Aggregations

ByteBuffer (java.nio.ByteBuffer)4 HashMap (java.util.HashMap)4 Assignment (org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment)4 ConnectorTaskId (org.apache.kafka.connect.util.ConnectorTaskId)4 ArrayList (java.util.ArrayList)3 Collection (java.util.Collection)3 Collections (java.util.Collections)3 HashSet (java.util.HashSet)3 Iterator (java.util.Iterator)3 LinkedHashSet (java.util.LinkedHashSet)3 List (java.util.List)3 Map (java.util.Map)3 Entry (java.util.Map.Entry)3 Objects (java.util.Objects)3 Set (java.util.Set)3 TreeSet (java.util.TreeSet)3 Function (java.util.function.Function)3 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 JoinGroupResponseMember (org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember)3