Search in sources :

Example 31 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class KafkaAssignerDiskUsageDistributionGoal method swapReplicas.

/**
 * Swap replica between two brokers. The method should achieve the result that the overall usage of the two
 * brokers are improved. More specifically, the following result is reduced.
 * <pre>
 * (<tt>UsageOfBroker1</tt> - <tt>MeanUsage</tt>) + (<tt>UsageOfBroker2</tt> - <tt>MeanUsage</tt>)
 * </pre>
 *
 * @param toSwap the broker that needs to swap a replica with the other broker.
 * @param toSwapWith the broker that provides a replica to swap with the broker <tt>toSwap</tt>
 * @param meanDiskUsage the average usage of the cluster.
 * @param clusterModel the cluster model.
 * @param excludedTopics the topics to exclude from swapping.
 * @return true if a swap has been done, false otherwise.
 */
boolean swapReplicas(Broker toSwap, Broker toSwapWith, double meanDiskUsage, ClusterModel clusterModel, Set<String> excludedTopics) {
    LOG.trace("Swapping replicas between broker {}({}) and broker {}({})", toSwap.id(), brokerSize(toSwap), toSwapWith.id(), brokerSize(toSwapWith));
    double sizeToChange = toSwap.capacityFor(DISK) * meanDiskUsage - brokerSize(toSwap);
    List<ReplicaWrapper> sortedReplicasToSwap = sortReplicasAscend(toSwap.replicas(), excludedTopics);
    List<ReplicaWrapper> sortedLeadersToSwapWith = sortReplicasAscend(toSwapWith.leaderReplicas(), excludedTopics);
    List<ReplicaWrapper> sortedFollowersToSwapWith = sortReplicasAscend(followerReplicas(toSwapWith), excludedTopics);
    int startPos;
    int delta;
    if (sizeToChange > 0) {
        // iterate from small replicas to large replicas.
        startPos = 0;
        delta = 1;
    } else {
        // iterate from large replicas to small replicas.
        startPos = sortedReplicasToSwap.size() - 1;
        delta = -1;
    }
    for (int i = startPos; i >= 0 && i < sortedReplicasToSwap.size(); i += delta) {
        Replica replicaToSwap = sortedReplicasToSwap.get(i).replica();
        if (excludedTopics.contains(replicaToSwap.topicPartition().topic())) {
            continue;
        }
        // don't bother to search for replica to swap with.
        if (!possibleToMove(replicaToSwap, toSwapWith, clusterModel)) {
            continue;
        }
        List<ReplicaWrapper> sortedReplicasToSwapWith = replicaToSwap.isLeader() ? sortedLeadersToSwapWith : sortedFollowersToSwapWith;
        double sizeToSwap = replicaSize(replicaToSwap);
        // No need to continue if we are trying to reduce the size and the replicas to swap out is of size 0.
        if (sizeToChange < 0 && sizeToSwap == 0) {
            break;
        }
        // when sizeToChange > 0, the broker toSwap needs more disk utilization, the replicaToSwapWith should meet the
        // following requirements:
        // 1. replicaToSwapWith.size() > replicaToSwap.size()
        // 2. After the swap, the disk usage of broker toSwap should not be more than the disk usage of broker
        // toSwapWith before the swap.
        // 3. After the swap, the disk usage of broker toSwapWith should not be less than the disk usage of broker
        // toSwap before the swap.
        // 
        // When sizeToChange < 0, the broker toSwap needs less disk utilization, the replicaToSwapWith should meet the
        // following requirements:
        // 4. replicaToSwapWith.size < replicaToSwap.size()
        // 5. After the swap, the disk usage of broker toSwap should not be less than the disk usage of broker
        // toSwapWith before the swap.
        // 6. After the swap, the disk usage of broker toSwapWith should not be more than the disk usage of broker
        // toSwap before the swap.
        // 
        // We do not require the swap to be under the balance upper limit or lower limit. Instead, we just ensure
        // that after the swap, the two replicas are closer to the mean usage.
        double maxSize = Double.MAX_VALUE;
        double minSize = Double.MIN_VALUE;
        if (sizeToChange > 0) {
            // requirement 1
            minSize = sizeToSwap;
            // requirement 2
            double maxSizeOfBrokerToSwap = diskUsage(toSwapWith) * toSwap.capacityFor(DISK);
            double currentSizeOfBrokerToSwap = brokerSize(toSwap);
            // after given out the sizeToSwap, the maximum size the broker toSwap can take in.
            maxSize = Math.min(maxSize, maxSizeOfBrokerToSwap - (currentSizeOfBrokerToSwap - sizeToSwap));
            // requirement 3
            double minSizeOfBrokerToSwapWith = diskUsage(toSwap) * toSwapWith.capacityFor(DISK);
            double currentSizeOfBrokerToSwapWith = brokerSize(toSwapWith);
            // after take in the sizeToSwap, the maximum size the broker toSwapWith can give out.
            maxSize = Math.min(maxSize, (currentSizeOfBrokerToSwapWith + sizeToSwap) - minSizeOfBrokerToSwapWith);
        } else {
            // requirement 4
            maxSize = sizeToSwap;
            // requirement 5
            double minSizeOfBrokerToSwap = diskUsage(toSwapWith) * toSwap.capacityFor(DISK);
            double currentSizeOfBrokerToSwap = brokerSize(toSwap);
            // After give out the sizeToSwap, the minimum size the broker toSwap should take in.
            minSize = Math.max(minSize, minSizeOfBrokerToSwap - (currentSizeOfBrokerToSwap - sizeToSwap));
            // requirement 6
            double maxSizeOfBrokerToSwapWith = diskUsage(toSwap) * toSwapWith.capacityFor(DISK);
            double currentSizeOfBrokerToSwapWith = brokerSize(toSwapWith);
            // after take in the sizeToSwap, the minimum size the broker toSwapWith should give out.
            minSize = Math.max(minSize, (currentSizeOfBrokerToSwapWith + sizeToSwap) - maxSizeOfBrokerToSwapWith);
        }
        // The target size might be negative here. It would still work for our binary search purpose.
        double targetSize = sizeToSwap + sizeToChange;
        // Find a replica that is eligible for swap.
        LOG.trace("replicaToSwap: {}(size={}), targetSize={}, minSize={}, maxSize={}", replicaToSwap, replicaSize(replicaToSwap), targetSize, minSize, maxSize);
        Replica replicaToSwapWith = sortedReplicasToSwapWith.isEmpty() ? null : findReplicaToSwapWith(replicaToSwap, sortedReplicasToSwapWith, targetSize, minSize, maxSize, clusterModel);
        if (replicaToSwapWith != null) {
            LOG.debug("Found replica to swap. Swapping {}({}) on broker {}({}) and {}({}) on broker {}({})", replicaToSwap.topicPartition(), replicaSize(replicaToSwap), toSwap.id(), brokerSize(toSwap), replicaToSwapWith.topicPartition(), replicaSize(replicaToSwapWith), toSwapWith.id(), brokerSize(toSwapWith));
            clusterModel.relocateReplica(replicaToSwapWith.topicPartition(), toSwapWith.id(), toSwap.id());
            clusterModel.relocateReplica(replicaToSwap.topicPartition(), toSwap.id(), toSwapWith.id());
            return true;
        }
    }
    LOG.trace("Nothing to swap between broker {} and broker {}", toSwap.id(), toSwapWith.id());
    return false;
}
Also used : Replica(com.linkedin.kafka.cruisecontrol.model.Replica) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Example 32 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class KafkaAssignerDiskUsageDistributionGoal method findReplicaPos.

/**
 * Find the position of the target size in the sorted replica list. The position is the index of the replica whose
 * size is closest to but greater than (or less than) the target size.
 *
 * @param sortedReplicas the replica list sorted in ascending order based on the size.
 * @param targetSize the replica size to find.
 * @param shiftOnExactMatch the position to shift if there is an exact match. The value should be 1 or 0 or -1.
 *                          When the value is 1, the method returns the position of the first replica whose size is
 *                          greater than the target size.
 *                          When the value is 0, the method returns the position of the first replica whose size is
 *                          greater than or equals to the target size.
 *                          When the value is -1, the method returns the position of the replica whose size is
 *                          just less than the target size.
 *
 * @return the index of the replica whose size is closest but greater than or equals to (shiftOnExactMatch = 0)
 * or greater than (shiftOnExactMatch = 1) or less than (shiftOnExactMatch = -1) the target size.
 */
private int findReplicaPos(List<ReplicaWrapper> sortedReplicas, double targetSize, int shiftOnExactMatch) {
    if (shiftOnExactMatch != 1 && shiftOnExactMatch != -1 && shiftOnExactMatch != 0) {
        throw new IllegalArgumentException("The shiftOnExactMatch value must be in {-1, 0, 1}");
    }
    int index = Collections.binarySearch(sortedReplicas, new ReplicaWrapper(null, targetSize), Comparator.comparingDouble(ReplicaWrapper::size));
    int checkIndex;
    switch(shiftOnExactMatch) {
        case -1:
            checkIndex = index >= 0 ? index : Math.min(-(index + 1), sortedReplicas.size() - 1);
            // The returned index is in [-1, (n-1)].
            while (checkIndex >= 0) {
                Replica r = sortedReplicas.get(checkIndex).replica();
                if (replicaSize(r) < targetSize - REPLICA_CONVERGENCE_DELTA) {
                    break;
                }
                checkIndex--;
            }
            return checkIndex;
        case 1:
            checkIndex = index >= 0 ? index : Math.min(-(index + 1), sortedReplicas.size() - 1);
            // The returned index is in [0, n].
            while (checkIndex < sortedReplicas.size()) {
                Replica r = sortedReplicas.get(checkIndex).replica();
                if (replicaSize(r) > targetSize + REPLICA_CONVERGENCE_DELTA) {
                    break;
                }
                checkIndex++;
            }
            return checkIndex;
        case 0:
            if (index >= 0) {
                // Found an exact match. No action needed. The returned index is in [0, (n-1)].
                return index;
            } else {
                // If cannot find the exact match, use the neighbor closest to the target size.
                // The returned index is in [0, (n-1)].
                int rightIndex = -(index + 1);
                if (rightIndex == sortedReplicas.size()) {
                    return sortedReplicas.size() - 1;
                } else if (rightIndex == 0) {
                    return 0;
                } else {
                    double leftSizeDiff = Math.abs(replicaSize(sortedReplicas.get(rightIndex - 1).replica()) - targetSize);
                    double rightSizeDiff = Math.abs(replicaSize(sortedReplicas.get(rightIndex).replica()) - targetSize);
                    return leftSizeDiff <= rightSizeDiff ? rightIndex - 1 : rightIndex;
                }
            }
        default:
            throw new IllegalStateException("Invalid shift on exact match value " + shiftOnExactMatch);
    }
}
Also used : Replica(com.linkedin.kafka.cruisecontrol.model.Replica) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Example 33 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class KafkaAssignerEvenRackAwareGoal method isReplicaMoveViolateRackAwareness.

private boolean isReplicaMoveViolateRackAwareness(ClusterModel clusterModel, Function<ClusterModel, Replica> sourceReplicaFunction, Function<ClusterModel, Broker> destinationBrokerFunction) {
    Replica sourceReplica = sourceReplicaFunction.apply(clusterModel);
    Broker destinationBroker = destinationBrokerFunction.apply(clusterModel);
    // Destination broker cannot be in a rack that violates rack awareness.
    Set<Broker> partitionBrokers = clusterModel.partition(sourceReplica.topicPartition()).partitionBrokers();
    partitionBrokers.remove(sourceReplica.broker());
    // Remove brokers in partition broker racks except the brokers in replica broker rack.
    for (Broker broker : partitionBrokers) {
        if (broker.rack().brokers().contains(destinationBroker)) {
            return true;
        }
    }
    return false;
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 34 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class CapacityGoal method rebalanceForBroker.

/**
 * (1) REBALANCE BY LEADERSHIP MOVEMENT:
 * Perform leadership movement to ensure that the load on brokers and/or hosts (see {@link Resource#isHostResource()}
 * and {@link Resource#isBrokerResource()}) for the outbound network load and CPU is under the capacity limit.
 *
 * <p>
 * (2) REBALANCE BY REPLICA MOVEMENT:
 * Perform optimization via replica movement for the given resource to ensure rebalance: The load on brokers and/or
 * hosts (see {@link Resource#isHostResource()} and {@link Resource#isBrokerResource()}) for the given resource is
 * under the capacity limit.
 *
 * @param broker         Broker to be balanced.
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param excludedTopics The topics that should be excluded from the optimization action.
 */
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
    LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
    Resource currentResource = resource();
    double capacityThreshold = _balancingConstraint.capacityThreshold(currentResource);
    double brokerCapacityLimit = broker.capacityFor(currentResource) * capacityThreshold;
    double hostCapacityLimit = broker.host().capacityFor(currentResource) * capacityThreshold;
    boolean isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
    if (!isUtilizationOverLimit) {
        // The utilization of source broker and/or host for the current resource is already under the capacity limit.
        return;
    }
    // First try REBALANCE BY LEADERSHIP MOVEMENT:
    if (currentResource == Resource.NW_OUT || currentResource == Resource.CPU) {
        // Sort replicas by descending order of preference to relocate. Preference is based on resource cost.
        // Only leaders in the source broker are sorted.
        List<Replica> sortedLeadersInSourceBroker = broker.sortedLeadersFor(currentResource);
        for (Replica leader : sortedLeadersInSourceBroker) {
            if (shouldExclude(leader, excludedTopics)) {
                continue;
            }
            // Get followers of this leader and sort them in ascending order by their broker resource utilization.
            List<Replica> followers = clusterModel.partition(leader.topicPartition()).followers();
            clusterModel.sortReplicasInAscendingOrderByBrokerResourceUtilization(followers, currentResource);
            List<Broker> eligibleBrokers = followers.stream().map(Replica::broker).collect(Collectors.toList());
            Broker b = maybeApplyBalancingAction(clusterModel, leader, eligibleBrokers, ActionType.LEADERSHIP_MOVEMENT, optimizedGoals);
            if (b == null) {
                LOG.debug("Failed to move leader replica {} to any other brokers in {}", leader, eligibleBrokers);
            }
            isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
            // Broker utilization has successfully been reduced under the capacity limit for the current resource.
            if (!isUtilizationOverLimit) {
                break;
            }
        }
    }
    // If leader movement did not work, move replicas.
    if (isUtilizationOverLimit) {
        // Get sorted healthy brokers under host and/or broker capacity limit (depending on the current resource).
        List<Broker> sortedHealthyBrokersUnderCapacityLimit = clusterModel.sortedHealthyBrokersUnderThreshold(currentResource, capacityThreshold);
        // be satisfied, throw an exception.
        for (Replica replica : broker.sortedReplicas(currentResource)) {
            if (shouldExclude(replica, excludedTopics)) {
                continue;
            }
            // Unless the target broker would go over the host- and/or broker-level capacity,
            // the movement will be successful.
            Broker b = maybeApplyBalancingAction(clusterModel, replica, sortedHealthyBrokersUnderCapacityLimit, ActionType.REPLICA_MOVEMENT, optimizedGoals);
            if (b == null) {
                LOG.debug("Failed to move replica {} to any broker in {}", replica, sortedHealthyBrokersUnderCapacityLimit);
            }
            // If capacity limit was not satisfied before, check if it is satisfied now.
            isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
            // Broker utilization has successfully been reduced under the capacity limit for the current resource.
            if (!isUtilizationOverLimit) {
                break;
            }
        }
    }
    if (isUtilizationOverLimit) {
        if (!currentResource.isHostResource()) {
            // Utilization is above the capacity limit after all replicas in the given source broker were checked.
            throw new OptimizationFailureException("Violated capacity limit of " + brokerCapacityLimit + " via broker " + "utilization of " + broker.load().expectedUtilizationFor(currentResource) + " with broker id " + broker.id() + " for resource " + currentResource);
        } else {
            throw new OptimizationFailureException("Violated capacity limit of " + hostCapacityLimit + " via host " + "utilization of " + broker.host().load().expectedUtilizationFor(currentResource) + " with hostname " + broker.host().name() + " for resource " + currentResource);
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 35 with Replica

use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.

the class CapacityGoal method actionAcceptance.

/**
 * Check whether the given action is acceptable by this goal. An action is acceptable by a goal if it satisfies
 * requirements of the goal. Requirements(hard goal): Capacity.
 *
 * ## Leadership Movement: impacts only (1) network outbound and (2) CPU resources (See
 * {@link DiskCapacityGoal#actionAcceptance(BalancingAction, ClusterModel)} and
 * {@link NetworkInboundCapacityGoal#actionAcceptance(BalancingAction, ClusterModel)}).
 *   (1) Check if leadership NW_OUT movement is acceptable: NW_OUT movement carries all of leader's NW_OUT load.
 *   (2) Check if leadership CPU movement is acceptable: In reality, CPU movement carries only a fraction of
 * leader's CPU load.
 * To optimize CC performance, we avoid calculation of the expected leadership CPU utilization, and assume that
 * if (action.balancingAction() == ActionType.LEADERSHIP_MOVEMENT && resource() == Resource.CPU),
 * then the expected leadership CPU utilization would be the full CPU utilization of the leader.
 * <p>
 * ## Replica Movement: impacts any resource.
 * ## Replica Swap: impacts any resource.
 *
 * @param action Action to be checked for acceptance.
 * @param clusterModel The state of the cluster.
 * @return {@link ActionAcceptance#ACCEPT} if the action is acceptable by this goal,
 * {@link ActionAcceptance#REPLICA_REJECT} otherwise.
 */
@Override
public ActionAcceptance actionAcceptance(BalancingAction action, ClusterModel clusterModel) {
    Replica sourceReplica = clusterModel.broker(action.sourceBrokerId()).replica(action.topicPartition());
    Broker destinationBroker = clusterModel.broker(action.destinationBrokerId());
    switch(action.balancingAction()) {
        case REPLICA_SWAP:
            Replica destinationReplica = destinationBroker.replica(action.destinationTopicPartition());
            return isSwapAcceptableForCapacity(sourceReplica, destinationReplica) ? ACCEPT : REPLICA_REJECT;
        case REPLICA_MOVEMENT:
        case LEADERSHIP_MOVEMENT:
            return isMovementAcceptableForCapacity(sourceReplica, destinationBroker) ? ACCEPT : REPLICA_REJECT;
        default:
            throw new IllegalArgumentException("Unsupported balancing action " + action.balancingAction() + " is provided.");
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Aggregations

Replica (com.linkedin.kafka.cruisecontrol.model.Replica)40 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)26 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)13 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)12 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)9 HashSet (java.util.HashSet)9 TreeSet (java.util.TreeSet)8 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)7 ActionAcceptance (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance)6 ActionType (com.linkedin.kafka.cruisecontrol.analyzer.ActionType)6 BalancingAction (com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction)6 ArrayList (java.util.ArrayList)6 List (java.util.List)6 ACCEPT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.ACCEPT)5 REPLICA_REJECT (com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.REPLICA_REJECT)5 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)5 ModelCompletenessRequirements (com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements)5 Set (java.util.Set)5 SortedSet (java.util.SortedSet)5 Collectors (java.util.stream.Collectors)5