use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.
the class KafkaAssignerDiskUsageDistributionGoal method swapReplicas.
/**
* Swap replica between two brokers. The method should achieve the result that the overall usage of the two
* brokers are improved. More specifically, the following result is reduced.
* <pre>
* (<tt>UsageOfBroker1</tt> - <tt>MeanUsage</tt>) + (<tt>UsageOfBroker2</tt> - <tt>MeanUsage</tt>)
* </pre>
*
* @param toSwap the broker that needs to swap a replica with the other broker.
* @param toSwapWith the broker that provides a replica to swap with the broker <tt>toSwap</tt>
* @param meanDiskUsage the average usage of the cluster.
* @param clusterModel the cluster model.
* @param excludedTopics the topics to exclude from swapping.
* @return true if a swap has been done, false otherwise.
*/
boolean swapReplicas(Broker toSwap, Broker toSwapWith, double meanDiskUsage, ClusterModel clusterModel, Set<String> excludedTopics) {
LOG.trace("Swapping replicas between broker {}({}) and broker {}({})", toSwap.id(), brokerSize(toSwap), toSwapWith.id(), brokerSize(toSwapWith));
double sizeToChange = toSwap.capacityFor(DISK) * meanDiskUsage - brokerSize(toSwap);
List<ReplicaWrapper> sortedReplicasToSwap = sortReplicasAscend(toSwap.replicas(), excludedTopics);
List<ReplicaWrapper> sortedLeadersToSwapWith = sortReplicasAscend(toSwapWith.leaderReplicas(), excludedTopics);
List<ReplicaWrapper> sortedFollowersToSwapWith = sortReplicasAscend(followerReplicas(toSwapWith), excludedTopics);
int startPos;
int delta;
if (sizeToChange > 0) {
// iterate from small replicas to large replicas.
startPos = 0;
delta = 1;
} else {
// iterate from large replicas to small replicas.
startPos = sortedReplicasToSwap.size() - 1;
delta = -1;
}
for (int i = startPos; i >= 0 && i < sortedReplicasToSwap.size(); i += delta) {
Replica replicaToSwap = sortedReplicasToSwap.get(i).replica();
if (excludedTopics.contains(replicaToSwap.topicPartition().topic())) {
continue;
}
// don't bother to search for replica to swap with.
if (!possibleToMove(replicaToSwap, toSwapWith, clusterModel)) {
continue;
}
List<ReplicaWrapper> sortedReplicasToSwapWith = replicaToSwap.isLeader() ? sortedLeadersToSwapWith : sortedFollowersToSwapWith;
double sizeToSwap = replicaSize(replicaToSwap);
// No need to continue if we are trying to reduce the size and the replicas to swap out is of size 0.
if (sizeToChange < 0 && sizeToSwap == 0) {
break;
}
// when sizeToChange > 0, the broker toSwap needs more disk utilization, the replicaToSwapWith should meet the
// following requirements:
// 1. replicaToSwapWith.size() > replicaToSwap.size()
// 2. After the swap, the disk usage of broker toSwap should not be more than the disk usage of broker
// toSwapWith before the swap.
// 3. After the swap, the disk usage of broker toSwapWith should not be less than the disk usage of broker
// toSwap before the swap.
//
// When sizeToChange < 0, the broker toSwap needs less disk utilization, the replicaToSwapWith should meet the
// following requirements:
// 4. replicaToSwapWith.size < replicaToSwap.size()
// 5. After the swap, the disk usage of broker toSwap should not be less than the disk usage of broker
// toSwapWith before the swap.
// 6. After the swap, the disk usage of broker toSwapWith should not be more than the disk usage of broker
// toSwap before the swap.
//
// We do not require the swap to be under the balance upper limit or lower limit. Instead, we just ensure
// that after the swap, the two replicas are closer to the mean usage.
double maxSize = Double.MAX_VALUE;
double minSize = Double.MIN_VALUE;
if (sizeToChange > 0) {
// requirement 1
minSize = sizeToSwap;
// requirement 2
double maxSizeOfBrokerToSwap = diskUsage(toSwapWith) * toSwap.capacityFor(DISK);
double currentSizeOfBrokerToSwap = brokerSize(toSwap);
// after given out the sizeToSwap, the maximum size the broker toSwap can take in.
maxSize = Math.min(maxSize, maxSizeOfBrokerToSwap - (currentSizeOfBrokerToSwap - sizeToSwap));
// requirement 3
double minSizeOfBrokerToSwapWith = diskUsage(toSwap) * toSwapWith.capacityFor(DISK);
double currentSizeOfBrokerToSwapWith = brokerSize(toSwapWith);
// after take in the sizeToSwap, the maximum size the broker toSwapWith can give out.
maxSize = Math.min(maxSize, (currentSizeOfBrokerToSwapWith + sizeToSwap) - minSizeOfBrokerToSwapWith);
} else {
// requirement 4
maxSize = sizeToSwap;
// requirement 5
double minSizeOfBrokerToSwap = diskUsage(toSwapWith) * toSwap.capacityFor(DISK);
double currentSizeOfBrokerToSwap = brokerSize(toSwap);
// After give out the sizeToSwap, the minimum size the broker toSwap should take in.
minSize = Math.max(minSize, minSizeOfBrokerToSwap - (currentSizeOfBrokerToSwap - sizeToSwap));
// requirement 6
double maxSizeOfBrokerToSwapWith = diskUsage(toSwap) * toSwapWith.capacityFor(DISK);
double currentSizeOfBrokerToSwapWith = brokerSize(toSwapWith);
// after take in the sizeToSwap, the minimum size the broker toSwapWith should give out.
minSize = Math.max(minSize, (currentSizeOfBrokerToSwapWith + sizeToSwap) - maxSizeOfBrokerToSwapWith);
}
// The target size might be negative here. It would still work for our binary search purpose.
double targetSize = sizeToSwap + sizeToChange;
// Find a replica that is eligible for swap.
LOG.trace("replicaToSwap: {}(size={}), targetSize={}, minSize={}, maxSize={}", replicaToSwap, replicaSize(replicaToSwap), targetSize, minSize, maxSize);
Replica replicaToSwapWith = sortedReplicasToSwapWith.isEmpty() ? null : findReplicaToSwapWith(replicaToSwap, sortedReplicasToSwapWith, targetSize, minSize, maxSize, clusterModel);
if (replicaToSwapWith != null) {
LOG.debug("Found replica to swap. Swapping {}({}) on broker {}({}) and {}({}) on broker {}({})", replicaToSwap.topicPartition(), replicaSize(replicaToSwap), toSwap.id(), brokerSize(toSwap), replicaToSwapWith.topicPartition(), replicaSize(replicaToSwapWith), toSwapWith.id(), brokerSize(toSwapWith));
clusterModel.relocateReplica(replicaToSwapWith.topicPartition(), toSwapWith.id(), toSwap.id());
clusterModel.relocateReplica(replicaToSwap.topicPartition(), toSwap.id(), toSwapWith.id());
return true;
}
}
LOG.trace("Nothing to swap between broker {} and broker {}", toSwap.id(), toSwapWith.id());
return false;
}
use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.
the class KafkaAssignerDiskUsageDistributionGoal method findReplicaPos.
/**
* Find the position of the target size in the sorted replica list. The position is the index of the replica whose
* size is closest to but greater than (or less than) the target size.
*
* @param sortedReplicas the replica list sorted in ascending order based on the size.
* @param targetSize the replica size to find.
* @param shiftOnExactMatch the position to shift if there is an exact match. The value should be 1 or 0 or -1.
* When the value is 1, the method returns the position of the first replica whose size is
* greater than the target size.
* When the value is 0, the method returns the position of the first replica whose size is
* greater than or equals to the target size.
* When the value is -1, the method returns the position of the replica whose size is
* just less than the target size.
*
* @return the index of the replica whose size is closest but greater than or equals to (shiftOnExactMatch = 0)
* or greater than (shiftOnExactMatch = 1) or less than (shiftOnExactMatch = -1) the target size.
*/
private int findReplicaPos(List<ReplicaWrapper> sortedReplicas, double targetSize, int shiftOnExactMatch) {
if (shiftOnExactMatch != 1 && shiftOnExactMatch != -1 && shiftOnExactMatch != 0) {
throw new IllegalArgumentException("The shiftOnExactMatch value must be in {-1, 0, 1}");
}
int index = Collections.binarySearch(sortedReplicas, new ReplicaWrapper(null, targetSize), Comparator.comparingDouble(ReplicaWrapper::size));
int checkIndex;
switch(shiftOnExactMatch) {
case -1:
checkIndex = index >= 0 ? index : Math.min(-(index + 1), sortedReplicas.size() - 1);
// The returned index is in [-1, (n-1)].
while (checkIndex >= 0) {
Replica r = sortedReplicas.get(checkIndex).replica();
if (replicaSize(r) < targetSize - REPLICA_CONVERGENCE_DELTA) {
break;
}
checkIndex--;
}
return checkIndex;
case 1:
checkIndex = index >= 0 ? index : Math.min(-(index + 1), sortedReplicas.size() - 1);
// The returned index is in [0, n].
while (checkIndex < sortedReplicas.size()) {
Replica r = sortedReplicas.get(checkIndex).replica();
if (replicaSize(r) > targetSize + REPLICA_CONVERGENCE_DELTA) {
break;
}
checkIndex++;
}
return checkIndex;
case 0:
if (index >= 0) {
// Found an exact match. No action needed. The returned index is in [0, (n-1)].
return index;
} else {
// If cannot find the exact match, use the neighbor closest to the target size.
// The returned index is in [0, (n-1)].
int rightIndex = -(index + 1);
if (rightIndex == sortedReplicas.size()) {
return sortedReplicas.size() - 1;
} else if (rightIndex == 0) {
return 0;
} else {
double leftSizeDiff = Math.abs(replicaSize(sortedReplicas.get(rightIndex - 1).replica()) - targetSize);
double rightSizeDiff = Math.abs(replicaSize(sortedReplicas.get(rightIndex).replica()) - targetSize);
return leftSizeDiff <= rightSizeDiff ? rightIndex - 1 : rightIndex;
}
}
default:
throw new IllegalStateException("Invalid shift on exact match value " + shiftOnExactMatch);
}
}
use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.
the class KafkaAssignerEvenRackAwareGoal method isReplicaMoveViolateRackAwareness.
private boolean isReplicaMoveViolateRackAwareness(ClusterModel clusterModel, Function<ClusterModel, Replica> sourceReplicaFunction, Function<ClusterModel, Broker> destinationBrokerFunction) {
Replica sourceReplica = sourceReplicaFunction.apply(clusterModel);
Broker destinationBroker = destinationBrokerFunction.apply(clusterModel);
// Destination broker cannot be in a rack that violates rack awareness.
Set<Broker> partitionBrokers = clusterModel.partition(sourceReplica.topicPartition()).partitionBrokers();
partitionBrokers.remove(sourceReplica.broker());
// Remove brokers in partition broker racks except the brokers in replica broker rack.
for (Broker broker : partitionBrokers) {
if (broker.rack().brokers().contains(destinationBroker)) {
return true;
}
}
return false;
}
use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.
the class CapacityGoal method rebalanceForBroker.
/**
* (1) REBALANCE BY LEADERSHIP MOVEMENT:
* Perform leadership movement to ensure that the load on brokers and/or hosts (see {@link Resource#isHostResource()}
* and {@link Resource#isBrokerResource()}) for the outbound network load and CPU is under the capacity limit.
*
* <p>
* (2) REBALANCE BY REPLICA MOVEMENT:
* Perform optimization via replica movement for the given resource to ensure rebalance: The load on brokers and/or
* hosts (see {@link Resource#isHostResource()} and {@link Resource#isBrokerResource()}) for the given resource is
* under the capacity limit.
*
* @param broker Broker to be balanced.
* @param clusterModel The state of the cluster.
* @param optimizedGoals Optimized goals.
* @param excludedTopics The topics that should be excluded from the optimization action.
*/
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
Resource currentResource = resource();
double capacityThreshold = _balancingConstraint.capacityThreshold(currentResource);
double brokerCapacityLimit = broker.capacityFor(currentResource) * capacityThreshold;
double hostCapacityLimit = broker.host().capacityFor(currentResource) * capacityThreshold;
boolean isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
if (!isUtilizationOverLimit) {
// The utilization of source broker and/or host for the current resource is already under the capacity limit.
return;
}
// First try REBALANCE BY LEADERSHIP MOVEMENT:
if (currentResource == Resource.NW_OUT || currentResource == Resource.CPU) {
// Sort replicas by descending order of preference to relocate. Preference is based on resource cost.
// Only leaders in the source broker are sorted.
List<Replica> sortedLeadersInSourceBroker = broker.sortedLeadersFor(currentResource);
for (Replica leader : sortedLeadersInSourceBroker) {
if (shouldExclude(leader, excludedTopics)) {
continue;
}
// Get followers of this leader and sort them in ascending order by their broker resource utilization.
List<Replica> followers = clusterModel.partition(leader.topicPartition()).followers();
clusterModel.sortReplicasInAscendingOrderByBrokerResourceUtilization(followers, currentResource);
List<Broker> eligibleBrokers = followers.stream().map(Replica::broker).collect(Collectors.toList());
Broker b = maybeApplyBalancingAction(clusterModel, leader, eligibleBrokers, ActionType.LEADERSHIP_MOVEMENT, optimizedGoals);
if (b == null) {
LOG.debug("Failed to move leader replica {} to any other brokers in {}", leader, eligibleBrokers);
}
isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
// Broker utilization has successfully been reduced under the capacity limit for the current resource.
if (!isUtilizationOverLimit) {
break;
}
}
}
// If leader movement did not work, move replicas.
if (isUtilizationOverLimit) {
// Get sorted healthy brokers under host and/or broker capacity limit (depending on the current resource).
List<Broker> sortedHealthyBrokersUnderCapacityLimit = clusterModel.sortedHealthyBrokersUnderThreshold(currentResource, capacityThreshold);
// be satisfied, throw an exception.
for (Replica replica : broker.sortedReplicas(currentResource)) {
if (shouldExclude(replica, excludedTopics)) {
continue;
}
// Unless the target broker would go over the host- and/or broker-level capacity,
// the movement will be successful.
Broker b = maybeApplyBalancingAction(clusterModel, replica, sortedHealthyBrokersUnderCapacityLimit, ActionType.REPLICA_MOVEMENT, optimizedGoals);
if (b == null) {
LOG.debug("Failed to move replica {} to any broker in {}", replica, sortedHealthyBrokersUnderCapacityLimit);
}
// If capacity limit was not satisfied before, check if it is satisfied now.
isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
// Broker utilization has successfully been reduced under the capacity limit for the current resource.
if (!isUtilizationOverLimit) {
break;
}
}
}
if (isUtilizationOverLimit) {
if (!currentResource.isHostResource()) {
// Utilization is above the capacity limit after all replicas in the given source broker were checked.
throw new OptimizationFailureException("Violated capacity limit of " + brokerCapacityLimit + " via broker " + "utilization of " + broker.load().expectedUtilizationFor(currentResource) + " with broker id " + broker.id() + " for resource " + currentResource);
} else {
throw new OptimizationFailureException("Violated capacity limit of " + hostCapacityLimit + " via host " + "utilization of " + broker.host().load().expectedUtilizationFor(currentResource) + " with hostname " + broker.host().name() + " for resource " + currentResource);
}
}
}
use of com.linkedin.kafka.cruisecontrol.model.Replica in project cruise-control by linkedin.
the class CapacityGoal method actionAcceptance.
/**
* Check whether the given action is acceptable by this goal. An action is acceptable by a goal if it satisfies
* requirements of the goal. Requirements(hard goal): Capacity.
*
* ## Leadership Movement: impacts only (1) network outbound and (2) CPU resources (See
* {@link DiskCapacityGoal#actionAcceptance(BalancingAction, ClusterModel)} and
* {@link NetworkInboundCapacityGoal#actionAcceptance(BalancingAction, ClusterModel)}).
* (1) Check if leadership NW_OUT movement is acceptable: NW_OUT movement carries all of leader's NW_OUT load.
* (2) Check if leadership CPU movement is acceptable: In reality, CPU movement carries only a fraction of
* leader's CPU load.
* To optimize CC performance, we avoid calculation of the expected leadership CPU utilization, and assume that
* if (action.balancingAction() == ActionType.LEADERSHIP_MOVEMENT && resource() == Resource.CPU),
* then the expected leadership CPU utilization would be the full CPU utilization of the leader.
* <p>
* ## Replica Movement: impacts any resource.
* ## Replica Swap: impacts any resource.
*
* @param action Action to be checked for acceptance.
* @param clusterModel The state of the cluster.
* @return {@link ActionAcceptance#ACCEPT} if the action is acceptable by this goal,
* {@link ActionAcceptance#REPLICA_REJECT} otherwise.
*/
@Override
public ActionAcceptance actionAcceptance(BalancingAction action, ClusterModel clusterModel) {
Replica sourceReplica = clusterModel.broker(action.sourceBrokerId()).replica(action.topicPartition());
Broker destinationBroker = clusterModel.broker(action.destinationBrokerId());
switch(action.balancingAction()) {
case REPLICA_SWAP:
Replica destinationReplica = destinationBroker.replica(action.destinationTopicPartition());
return isSwapAcceptableForCapacity(sourceReplica, destinationReplica) ? ACCEPT : REPLICA_REJECT;
case REPLICA_MOVEMENT:
case LEADERSHIP_MOVEMENT:
return isMovementAcceptableForCapacity(sourceReplica, destinationBroker) ? ACCEPT : REPLICA_REJECT;
default:
throw new IllegalArgumentException("Unsupported balancing action " + action.balancingAction() + " is provided.");
}
}
Aggregations