Search in sources :

Example 6 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class ReplicaCapacityGoal method rebalanceForBroker.

/**
 * Rebalance the given broker without violating the constraints of the current goal and optimized goals.
 * @param broker         Broker to be balanced.
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Optimized goals.
 * @param excludedTopics The topics that should be excluded from the optimization proposals.
 */
@Override
protected void rebalanceForBroker(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
    LOG.debug("balancing broker {}, optimized goals = {}", broker, optimizedGoals);
    for (Replica replica : new ArrayList<>(broker.replicas())) {
        if (broker.isAlive() && broker.replicas().size() <= _balancingConstraint.maxReplicasPerBroker()) {
            break;
        }
        if (shouldExclude(replica, excludedTopics)) {
            continue;
        }
        // The goal requirements are violated. Move replica to an eligible broker.
        List<Broker> eligibleBrokers = eligibleBrokers(replica, clusterModel).stream().map(BrokerReplicaCount::broker).collect(Collectors.toList());
        Broker b = maybeApplyBalancingAction(clusterModel, replica, eligibleBrokers, ActionType.REPLICA_MOVEMENT, optimizedGoals);
        if (b == null) {
            if (!broker.isAlive()) {
                // If the replica resides in a dead broker, throw an exception!
                throw new OptimizationFailureException(String.format("Failed to move dead broker replica %s of partition %s " + "to a broker in %s. Limit: %d for brokers: %s", replica, clusterModel.partition(replica.topicPartition()), eligibleBrokers, _balancingConstraint.maxReplicasPerBroker(), clusterModel.brokers()));
            }
            LOG.debug("Failed to move replica {} to any broker in {}.", replica, eligibleBrokers);
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ArrayList(java.util.ArrayList) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 7 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class ReplicaDistributionGoal method updateGoalState.

/**
 * Update goal state after one round of self-healing / rebalance.
 * @param clusterModel The state of the cluster.
 * @param excludedTopics The topics that should be excluded from the optimization proposal.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // While proposals exclude the excludedTopics, the balance still considers utilization of the excludedTopic replicas.
    if (!_brokerIdsAboveBalanceUpperLimit.isEmpty()) {
        LOG.warn("Replicas count on broker ids:{} {} above the balance limit of {} after {}.", _brokerIdsAboveBalanceUpperLimit, (_brokerIdsAboveBalanceUpperLimit.size() > 1) ? "are" : "is", _balanceUpperLimit, (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
        _brokerIdsAboveBalanceUpperLimit.clear();
        _succeeded = false;
    }
    if (!_brokerIdsUnderBalanceLowerLimit.isEmpty()) {
        LOG.warn("Replica count on broker ids:{} {} under the balance limit of {} after {}.", _brokerIdsUnderBalanceLowerLimit, (_brokerIdsUnderBalanceLowerLimit.size() > 1) ? "are" : "is", _balanceLowerLimit, (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
        _brokerIdsUnderBalanceLowerLimit.clear();
        _succeeded = false;
    }
    // Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
    for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
        if (replica.broker().isAlive()) {
            continue;
        }
        if (_selfHealingDeadBrokersOnly) {
            throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned brokers.");
        }
        _selfHealingDeadBrokersOnly = true;
        LOG.warn("Omitting resource balance limit to relocate remaining replicas from dead brokers to healthy ones.");
        return;
    }
    // No dead broker contains replica.
    _selfHealingDeadBrokersOnly = false;
    // Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
    for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
        if (!replica.broker().isAlive()) {
            throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned broker.");
        }
    }
    finish();
}
Also used : OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica)

Example 8 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class KafkaAssignerEvenRackAwareGoal method ensureRackAwareSatisfiable.

/**
 * Sanity Check: There exists sufficient number of racks for achieving rack-awareness.
 *
 * @param clusterModel The state of the cluster.
 * @param excludedTopics The topics that should be excluded from the optimization proposals.
 */
private void ensureRackAwareSatisfiable(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // Sanity Check: not enough racks to satisfy rack awareness.
    int numHealthyRacks = clusterModel.numHealthyRacks();
    if (!excludedTopics.isEmpty()) {
        int maxReplicationFactorOfIncludedTopics = 1;
        Map<String, Integer> replicationFactorByTopic = clusterModel.replicationFactorByTopic();
        for (Map.Entry<String, Integer> replicationFactorByTopicEntry : replicationFactorByTopic.entrySet()) {
            if (!excludedTopics.contains(replicationFactorByTopicEntry.getKey())) {
                maxReplicationFactorOfIncludedTopics = Math.max(maxReplicationFactorOfIncludedTopics, replicationFactorByTopicEntry.getValue());
                if (maxReplicationFactorOfIncludedTopics > numHealthyRacks) {
                    throw new OptimizationFailureException("Insufficient number of racks to distribute included replicas.");
                }
            }
        }
    } else if (clusterModel.maxReplicationFactor() > numHealthyRacks) {
        throw new OptimizationFailureException("Insufficient number of racks to distribute each replica.");
    }
}
Also used : OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) HashMap(java.util.HashMap) Map(java.util.Map)

Example 9 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class KafkaAssignerEvenRackAwareGoal method optimize.

/**
 * Optimize the given cluster model as needed for this goal.
 *
 * @param clusterModel   The state of the cluster.
 * @param optimizedGoals Goals that have already been optimized. These goals cannot be violated.
 * @param excludedTopics The topics that should be excluded from the optimization action.
 * @return true if the goal is met after the optimization, throws an exceptions if the goal is not met.
 */
@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws KafkaCruiseControlException {
    LOG.debug("Starting {} with excluded topics = {}", name(), excludedTopics);
    if (!optimizedGoals.isEmpty()) {
        throw new IllegalArgumentException(String.format("Goals %s cannot be optimized before %s.", optimizedGoals, name()));
    }
    initGoalState(clusterModel, excludedTopics);
    // STEP1: Move leader to the first position in partition replica list.
    for (Map.Entry<String, List<Partition>> entry : _partitionsByTopic.entrySet()) {
        for (Partition partition : entry.getValue()) {
            // Ensure the first replica is the leader.
            if (partition.replicas().get(0) != partition.leader()) {
                partition.swapReplicaPositions(0, partition.replicas().indexOf(partition.leader()));
            }
        }
    }
    // STEP2: Perform optimization.
    int maxReplicationFactor = clusterModel.maxReplicationFactor();
    for (int position = 0; position < maxReplicationFactor; position++) {
        for (Map.Entry<String, List<Partition>> entry : _partitionsByTopic.entrySet()) {
            for (Partition partition : entry.getValue()) {
                if (partition.replicas().size() <= position) {
                    continue;
                }
                if (shouldExclude(partition, position, excludedTopics)) {
                    continue;
                }
                // Apply the necessary move (if needed).
                if (!maybeApplyMove(clusterModel, partition, position)) {
                    throw new OptimizationFailureException(String.format("Unable to apply move for replica %s.", replicaAtPosition(partition, position)));
                }
            }
        }
    }
    ensureRackAware(clusterModel, excludedTopics);
    // Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
    AnalyzerUtils.ensureNoReplicaOnDeadBrokers(clusterModel);
    return true;
}
Also used : Partition(com.linkedin.kafka.cruisecontrol.model.Partition) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 10 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class AbstractGoal method optimize.

@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, Set<String> excludedTopics) throws OptimizationFailureException {
    _succeeded = true;
    LOG.debug("Starting optimization for {}.", name());
    // Initialize pre-optimized stats.
    ClusterModelStats statsBeforeOptimization = clusterModel.getClusterStats(_balancingConstraint);
    LOG.trace("[PRE - {}] {}", name(), statsBeforeOptimization);
    _finished = false;
    long goalStartTime = System.currentTimeMillis();
    initGoalState(clusterModel, excludedTopics);
    Collection<Broker> deadBrokers = clusterModel.deadBrokers();
    while (!_finished) {
        for (Broker broker : brokersToBalance(clusterModel)) {
            rebalanceForBroker(broker, clusterModel, optimizedGoals, excludedTopics);
        }
        updateGoalState(clusterModel, excludedTopics);
    }
    ClusterModelStats statsAfterOptimization = clusterModel.getClusterStats(_balancingConstraint);
    LOG.trace("[POST - {}] {}", name(), statsAfterOptimization);
    LOG.debug("Finished optimization for {} in {}ms.", name(), System.currentTimeMillis() - goalStartTime);
    LOG.trace("Cluster after optimization is {}", clusterModel);
    // We only ensure the optimization did not make stats worse when it is not self-healing.
    if (deadBrokers.isEmpty()) {
        ClusterModelStatsComparator comparator = clusterModelStatsComparator();
        // Throw exception when the stats before optimization is preferred.
        if (comparator.compare(statsAfterOptimization, statsBeforeOptimization) < 0) {
            throw new OptimizationFailureException("Optimization for Goal " + name() + " failed because the optimized" + "result is worse than before. Detail reason: " + comparator.explainLastComparison());
        }
    }
    return _succeeded;
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) ClusterModelStats(com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)

Aggregations

OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)15 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)8 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)8 HashSet (java.util.HashSet)3 Map (java.util.Map)3 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)2 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)1 Load (com.linkedin.kafka.cruisecontrol.model.Load)1 Partition (com.linkedin.kafka.cruisecontrol.model.Partition)1 List (java.util.List)1 TreeSet (java.util.TreeSet)1