use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class CapacityGoal method rebalanceForBroker.
/**
* (1) REBALANCE BY LEADERSHIP MOVEMENT:
* Perform leadership movement to ensure that the load on brokers and/or hosts (see {@link Resource#isHostResource()}
* and {@link Resource#isBrokerResource()}) for the outbound network load and CPU is under the capacity limit.
*
* <p>
* (2) REBALANCE BY REPLICA MOVEMENT:
* Perform optimization via replica movement for the given resource to ensure rebalance: The load on brokers and/or
* hosts (see {@link Resource#isHostResource()} and {@link Resource#isBrokerResource()}) for the given resource is
* under the capacity limit.
*
* @param broker Broker to be balanced.
* @param clusterModel The state of the cluster.
* @param optimizedGoals Optimized goals.
* @param excludedTopics The topics that should be excluded from the optimization action.
*/
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
Resource currentResource = resource();
double capacityThreshold = _balancingConstraint.capacityThreshold(currentResource);
double brokerCapacityLimit = broker.capacityFor(currentResource) * capacityThreshold;
double hostCapacityLimit = broker.host().capacityFor(currentResource) * capacityThreshold;
boolean isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
if (!isUtilizationOverLimit) {
// The utilization of source broker and/or host for the current resource is already under the capacity limit.
return;
}
// First try REBALANCE BY LEADERSHIP MOVEMENT:
if (currentResource == Resource.NW_OUT || currentResource == Resource.CPU) {
// Sort replicas by descending order of preference to relocate. Preference is based on resource cost.
// Only leaders in the source broker are sorted.
List<Replica> sortedLeadersInSourceBroker = broker.sortedLeadersFor(currentResource);
for (Replica leader : sortedLeadersInSourceBroker) {
if (shouldExclude(leader, excludedTopics)) {
continue;
}
// Get followers of this leader and sort them in ascending order by their broker resource utilization.
List<Replica> followers = clusterModel.partition(leader.topicPartition()).followers();
clusterModel.sortReplicasInAscendingOrderByBrokerResourceUtilization(followers, currentResource);
List<Broker> eligibleBrokers = followers.stream().map(Replica::broker).collect(Collectors.toList());
Broker b = maybeApplyBalancingAction(clusterModel, leader, eligibleBrokers, ActionType.LEADERSHIP_MOVEMENT, optimizedGoals);
if (b == null) {
LOG.debug("Failed to move leader replica {} to any other brokers in {}", leader, eligibleBrokers);
}
isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
// Broker utilization has successfully been reduced under the capacity limit for the current resource.
if (!isUtilizationOverLimit) {
break;
}
}
}
// If leader movement did not work, move replicas.
if (isUtilizationOverLimit) {
// Get sorted healthy brokers under host and/or broker capacity limit (depending on the current resource).
List<Broker> sortedHealthyBrokersUnderCapacityLimit = clusterModel.sortedHealthyBrokersUnderThreshold(currentResource, capacityThreshold);
// be satisfied, throw an exception.
for (Replica replica : broker.sortedReplicas(currentResource)) {
if (shouldExclude(replica, excludedTopics)) {
continue;
}
// Unless the target broker would go over the host- and/or broker-level capacity,
// the movement will be successful.
Broker b = maybeApplyBalancingAction(clusterModel, replica, sortedHealthyBrokersUnderCapacityLimit, ActionType.REPLICA_MOVEMENT, optimizedGoals);
if (b == null) {
LOG.debug("Failed to move replica {} to any broker in {}", replica, sortedHealthyBrokersUnderCapacityLimit);
}
// If capacity limit was not satisfied before, check if it is satisfied now.
isUtilizationOverLimit = isUtilizationOverLimit(broker, currentResource, brokerCapacityLimit, hostCapacityLimit);
// Broker utilization has successfully been reduced under the capacity limit for the current resource.
if (!isUtilizationOverLimit) {
break;
}
}
}
if (isUtilizationOverLimit) {
if (!currentResource.isHostResource()) {
// Utilization is above the capacity limit after all replicas in the given source broker were checked.
throw new OptimizationFailureException("Violated capacity limit of " + brokerCapacityLimit + " via broker " + "utilization of " + broker.load().expectedUtilizationFor(currentResource) + " with broker id " + broker.id() + " for resource " + currentResource);
} else {
throw new OptimizationFailureException("Violated capacity limit of " + hostCapacityLimit + " via host " + "utilization of " + broker.host().load().expectedUtilizationFor(currentResource) + " with hostname " + broker.host().name() + " for resource " + currentResource);
}
}
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class PotentialNwOutGoal method updateGoalState.
/**
* Update goal state after one round of self-healing / rebalance.
*
* @param clusterModel The state of the cluster.
*/
@Override
protected void updateGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
// Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
if (replica.broker().isAlive()) {
continue;
}
if (_selfHealingDeadBrokersOnly) {
throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned brokers.");
}
_selfHealingDeadBrokersOnly = true;
LOG.warn("Ignoring potential network outbound limit to relocate remaining replicas from dead brokers to healthy ones.");
return;
}
finish();
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class RackAwareGoal method ensureRackAware.
private void ensureRackAware(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
// Sanity check to confirm that the final distribution is rack aware.
for (Replica leader : clusterModel.leaderReplicas()) {
if (excludedTopics.contains(leader.topicPartition().topic())) {
continue;
}
Set<String> replicaBrokersRackIds = new HashSet<>();
Set<Broker> followerBrokers = new HashSet<>(clusterModel.partition(leader.topicPartition()).followerBrokers());
// Add rack Id of replicas.
for (Broker followerBroker : followerBrokers) {
String followerRackId = followerBroker.rack().id();
replicaBrokersRackIds.add(followerRackId);
}
replicaBrokersRackIds.add(leader.broker().rack().id());
if (replicaBrokersRackIds.size() != (followerBrokers.size() + 1)) {
throw new OptimizationFailureException("Optimization for goal " + name() + " failed for rack-awareness of " + "partition " + leader.topicPartition());
}
}
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class RackAwareGoal method rebalanceForBroker.
/**
* Rack-awareness violations can be resolved with replica movements.
*
* @param broker Broker to be balanced.
* @param clusterModel The state of the cluster.
* @param optimizedGoals Optimized goals.
* @param excludedTopics The topics that should be excluded from the optimization action.
*/
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
// Satisfy rack awareness requirement.
SortedSet<Replica> replicas = new TreeSet<>(broker.replicas());
for (Replica replica : replicas) {
if ((broker.isAlive() && satisfiedRackAwareness(replica, clusterModel)) || shouldExclude(replica, excludedTopics)) {
continue;
}
// Rack awareness is violated. Move replica to a broker in another rack.
if (maybeApplyBalancingAction(clusterModel, replica, rackAwareEligibleBrokers(replica, clusterModel), ActionType.REPLICA_MOVEMENT, optimizedGoals) == null) {
throw new OptimizationFailureException("Violated rack-awareness requirement for broker with id " + broker.id() + ".");
}
}
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class ReplicaCapacityGoal method initGoalState.
/**
* This is a hard goal; hence, the proposals are not limited to dead broker replicas in case of self-healing.
* Sanity Check: Each node has sufficient number of replicas that can be moved to satisfy the replica capacity goal.
*
* @param clusterModel The state of the cluster.
* @param excludedTopics The topics that should be excluded from the optimization proposals.
*/
@Override
protected void initGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
List<String> topicsToRebalance = new ArrayList<>(clusterModel.topics());
topicsToRebalance.removeAll(excludedTopics);
if (topicsToRebalance.isEmpty()) {
LOG.warn("All topics are excluded from {}.", name());
}
// Sanity check: excluded topic replicas in a broker cannot exceed the max number of allowed replicas per broker.
int totalReplicasInCluster = 0;
for (Broker broker : brokersToBalance(clusterModel)) {
if (!broker.isAlive()) {
_isSelfHealingMode = true;
continue;
}
int excludedReplicasInBroker = 0;
for (String topic : excludedTopics) {
excludedReplicasInBroker += broker.replicasOfTopicInBroker(topic).size();
}
if (excludedReplicasInBroker > _balancingConstraint.maxReplicasPerBroker()) {
throw new OptimizationFailureException(String.format("Replicas of excluded topics in broker: %d exceeds the maximum " + "allowed number of replicas: %d.", excludedReplicasInBroker, _balancingConstraint.maxReplicasPerBroker()));
}
// Calculate total number of replicas for the next sanity check.
totalReplicasInCluster += broker.replicas().size();
}
// Sanity check: total replicas in the cluster cannot be more than the allowed replicas in the cluster.
long maxReplicasInCluster = _balancingConstraint.maxReplicasPerBroker() * clusterModel.healthyBrokers().size();
if (totalReplicasInCluster > maxReplicasInCluster) {
throw new OptimizationFailureException(String.format("Total replicas in cluster: %d exceeds the maximum allowed " + "replicas in cluster: %d.", totalReplicasInCluster, maxReplicasInCluster));
}
}
Aggregations