use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class ReplicaCapacityGoal method rebalanceForBroker.
/**
* Rebalance the given broker without violating the constraints of the current goal and optimized goals.
* @param broker Broker to be balanced.
* @param clusterModel The state of the cluster.
* @param optimizedGoals Optimized goals.
* @param excludedTopics The topics that should be excluded from the optimization proposals.
*/
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
for (Replica replica : new ArrayList<>(broker.replicas())) {
if (broker.isAlive() && broker.replicas().size() <= _balancingConstraint.maxReplicasPerBroker()) {
break;
}
if (shouldExclude(replica, excludedTopics)) {
continue;
}
// The goal requirements are violated. Move replica to an eligible broker.
List<Broker> eligibleBrokers = eligibleBrokers(replica, clusterModel).stream().map(BrokerReplicaCount::broker).collect(Collectors.toList());
Broker b = maybeApplyBalancingAction(clusterModel, replica, eligibleBrokers, ActionType.REPLICA_MOVEMENT, optimizedGoals);
if (b == null) {
if (!broker.isAlive()) {
// If the replica resides in a dead broker, throw an exception!
throw new OptimizationFailureException(String.format("Failed to move dead broker replica %s of partition %s " + "to a broker in %s. Limit: %d for brokers: %s", replica, clusterModel.partition(replica.topicPartition()), eligibleBrokers, _balancingConstraint.maxReplicasPerBroker(), clusterModel.brokers()));
}
LOG.debug("Failed to move replica {} to any broker in {}.", replica, eligibleBrokers);
}
}
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class ReplicaDistributionGoal method updateGoalState.
/**
* Update goal state after one round of self-healing / rebalance.
* @param clusterModel The state of the cluster.
* @param excludedTopics The topics that should be excluded from the optimization proposal.
*/
@Override
protected void updateGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
// While proposals exclude the excludedTopics, the balance still considers utilization of the excludedTopic replicas.
if (!_brokerIdsAboveBalanceUpperLimit.isEmpty()) {
LOG.warn("Replicas count on broker ids:{} {} above the balance limit of {} after {}.", _brokerIdsAboveBalanceUpperLimit, (_brokerIdsAboveBalanceUpperLimit.size() > 1) ? "are" : "is", _balanceUpperLimit, (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
_brokerIdsAboveBalanceUpperLimit.clear();
_succeeded = false;
}
if (!_brokerIdsUnderBalanceLowerLimit.isEmpty()) {
LOG.warn("Replica count on broker ids:{} {} under the balance limit of {} after {}.", _brokerIdsUnderBalanceLowerLimit, (_brokerIdsUnderBalanceLowerLimit.size() > 1) ? "are" : "is", _balanceLowerLimit, (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
_brokerIdsUnderBalanceLowerLimit.clear();
_succeeded = false;
}
// Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
if (replica.broker().isAlive()) {
continue;
}
if (_selfHealingDeadBrokersOnly) {
throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned brokers.");
}
_selfHealingDeadBrokersOnly = true;
LOG.warn("Omitting resource balance limit to relocate remaining replicas from dead brokers to healthy ones.");
return;
}
// No dead broker contains replica.
_selfHealingDeadBrokersOnly = false;
// Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
if (!replica.broker().isAlive()) {
throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned broker.");
}
}
finish();
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class KafkaAssignerEvenRackAwareGoal method ensureRackAwareSatisfiable.
/**
* Sanity Check: There exists sufficient number of racks for achieving rack-awareness.
*
* @param clusterModel The state of the cluster.
* @param excludedTopics The topics that should be excluded from the optimization proposals.
*/
private void ensureRackAwareSatisfiable(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
// Sanity Check: not enough racks to satisfy rack awareness.
int numHealthyRacks = clusterModel.numHealthyRacks();
if (!excludedTopics.isEmpty()) {
int maxReplicationFactorOfIncludedTopics = 1;
Map<String, Integer> replicationFactorByTopic = clusterModel.replicationFactorByTopic();
for (Map.Entry<String, Integer> replicationFactorByTopicEntry : replicationFactorByTopic.entrySet()) {
if (!excludedTopics.contains(replicationFactorByTopicEntry.getKey())) {
maxReplicationFactorOfIncludedTopics = Math.max(maxReplicationFactorOfIncludedTopics, replicationFactorByTopicEntry.getValue());
if (maxReplicationFactorOfIncludedTopics > numHealthyRacks) {
throw new OptimizationFailureException("Insufficient number of racks to distribute included replicas.");
}
}
}
} else if (clusterModel.maxReplicationFactor() > numHealthyRacks) {
throw new OptimizationFailureException("Insufficient number of racks to distribute each replica.");
}
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class KafkaAssignerEvenRackAwareGoal method optimize.
/**
* Optimize the given cluster model as needed for this goal.
*
* @param clusterModel The state of the cluster.
* @param optimizedGoals Goals that have already been optimized. These goals cannot be violated.
* @param excludedTopics The topics that should be excluded from the optimization action.
* @return true if the goal is met after the optimization, throws an exceptions if the goal is not met.
*/
@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws KafkaCruiseControlException {
LOG.debug("Starting {} with excluded topics = {}", name(), excludedTopics);
if (!optimizedGoals.isEmpty()) {
throw new IllegalArgumentException(String.format("Goals %s cannot be optimized before %s.", optimizedGoals, name()));
}
initGoalState(clusterModel, excludedTopics);
// STEP1: Move leader to the first position in partition replica list.
for (Map.Entry<String, List<Partition>> entry : _partitionsByTopic.entrySet()) {
for (Partition partition : entry.getValue()) {
// Ensure the first replica is the leader.
if (partition.replicas().get(0) != partition.leader()) {
partition.swapReplicaPositions(0, partition.replicas().indexOf(partition.leader()));
}
}
}
// STEP2: Perform optimization.
int maxReplicationFactor = clusterModel.maxReplicationFactor();
for (int position = 0; position < maxReplicationFactor; position++) {
for (Map.Entry<String, List<Partition>> entry : _partitionsByTopic.entrySet()) {
for (Partition partition : entry.getValue()) {
if (partition.replicas().size() <= position) {
continue;
}
if (shouldExclude(partition, position, excludedTopics)) {
continue;
}
// Apply the necessary move (if needed).
if (!maybeApplyMove(clusterModel, partition, position)) {
throw new OptimizationFailureException(String.format("Unable to apply move for replica %s.", replicaAtPosition(partition, position)));
}
}
}
}
ensureRackAware(clusterModel, excludedTopics);
// Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
AnalyzerUtils.ensureNoReplicaOnDeadBrokers(clusterModel);
return true;
}
use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.
the class AbstractGoal method optimize.
@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
_succeeded = true;
LOG.debug("Starting optimization for {}.", name());
// Initialize pre-optimized stats.
ClusterModelStats statsBeforeOptimization = clusterModel.getClusterStats(_balancingConstraint);
LOG.trace("[PRE - {}] {}", name(), statsBeforeOptimization);
_finished = false;
long goalStartTime = System.currentTimeMillis();
initGoalState(clusterModel, excludedTopics);
Collection<Broker> deadBrokers = clusterModel.deadBrokers();
while (!_finished) {
for (Broker broker : brokersToBalance(clusterModel)) {
rebalanceForBroker(broker, clusterModel, optimizedGoals, excludedTopics);
}
updateGoalState(clusterModel, excludedTopics);
}
ClusterModelStats statsAfterOptimization = clusterModel.getClusterStats(_balancingConstraint);
LOG.trace("[POST - {}] {}", name(), statsAfterOptimization);
LOG.debug("Finished optimization for {} in {}ms.", name(), System.currentTimeMillis() - goalStartTime);
LOG.trace("Cluster after optimization is {}", clusterModel);
// We only ensure the optimization did not make stats worse when it is not self-healing.
if (deadBrokers.isEmpty()) {
ClusterModelStatsComparator comparator = clusterModelStatsComparator();
// Throw exception when the stats before optimization is preferred.
if (comparator.compare(statsAfterOptimization, statsBeforeOptimization) < 0) {
throw new OptimizationFailureException("Optimization for Goal " + name() + " failed because the optimized" + "result is worse than before. Detail reason: " + comparator.explainLastComparison());
}
}
return _succeeded;
}
Aggregations