Search in sources :

Example 1 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class ResourceDistributionGoal method updateGoalState.

/**
 * Update the current resource that is being balanced if there are still resources to be balanced, finish otherwise.
 *
 * @param clusterModel The state of the cluster.
 * @param excludedTopics The topics that should be excluded from the optimization action.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    Set<Integer> brokerIdsAboveBalanceUpperLimit = new HashSet<>();
    Set<Integer> brokerIdsUnderBalanceLowerLimit = new HashSet<>();
    // While proposals exclude the excludedTopics, the balance still considers utilization of the excludedTopic replicas.
    for (Broker broker : clusterModel.healthyBrokers()) {
        if (!isLoadUnderBalanceUpperLimit(broker)) {
            brokerIdsAboveBalanceUpperLimit.add(broker.id());
        }
        if (!isLoadAboveBalanceLowerLimit(broker)) {
            brokerIdsUnderBalanceLowerLimit.add(broker.id());
        }
    }
    if (!brokerIdsAboveBalanceUpperLimit.isEmpty()) {
        LOG.warn("Utilization for broker ids:{} {} above the balance limit for:{} after {}.", brokerIdsAboveBalanceUpperLimit, (brokerIdsAboveBalanceUpperLimit.size() > 1) ? "are" : "is", resource(), (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
        _succeeded = false;
    }
    if (!brokerIdsUnderBalanceLowerLimit.isEmpty()) {
        LOG.warn("Utilization for broker ids:{} {} under the balance limit for:{} after {}.", brokerIdsUnderBalanceLowerLimit, (brokerIdsUnderBalanceLowerLimit.size() > 1) ? "are" : "is", resource(), (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
        _succeeded = false;
    }
    // Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
    for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
        if (replica.broker().isAlive()) {
            continue;
        }
        if (_selfHealingDeadBrokersOnly) {
            throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned brokers.");
        }
        _selfHealingDeadBrokersOnly = true;
        LOG.warn("Omitting resource balance limit to relocate remaining replicas from dead brokers to healthy ones.");
        return;
    }
    // No dead broker contains replica.
    _selfHealingDeadBrokersOnly = false;
    // Sanity check: No self-healing eligible replica should remain at a decommissioned broker.
    for (Replica replica : clusterModel.selfHealingEligibleReplicas()) {
        if (!replica.broker().isAlive()) {
            throw new OptimizationFailureException("Self healing failed to move the replica away from decommissioned broker.");
        }
    }
    finish();
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) HashSet(java.util.HashSet)

Example 2 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class KafkaAssignerEvenRackAwareGoal method ensureRackAware.

/**
 * Sanity Check: Replicas are distributed in a rack-aware way.
 *
 * @param clusterModel The state of the cluster.
 * @param excludedTopics The topics that should be excluded from the optimization proposals.
 */
private void ensureRackAware(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // Sanity check to confirm that the final distribution is rack aware.
    for (Replica leader : clusterModel.leaderReplicas()) {
        if (excludedTopics.contains(leader.topicPartition().topic())) {
            continue;
        }
        Set<String> replicaBrokersRackIds = new HashSet<>();
        Set<Broker> followerBrokers = new HashSet<>(clusterModel.partition(leader.topicPartition()).followerBrokers());
        // Add rack Id of replicas.
        for (Broker followerBroker : followerBrokers) {
            String followerRackId = followerBroker.rack().id();
            replicaBrokersRackIds.add(followerRackId);
        }
        replicaBrokersRackIds.add(leader.broker().rack().id());
        if (replicaBrokersRackIds.size() != (followerBrokers.size() + 1)) {
            throw new OptimizationFailureException("Optimization for goal " + name() + " failed for rack-awareness of " + "partition " + leader.topicPartition());
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Replica(com.linkedin.kafka.cruisecontrol.model.Replica) HashSet(java.util.HashSet)

Example 3 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class CapacityGoal method ensureUtilizationUnderCapacity.

/**
 * Ensure that for the resource, the utilization is under the capacity of the host/broker-level.
 * {@link Resource#_isBrokerResource} and {@link Resource#isHostResource()} determines the level of checks this
 * function performs.
 * @param clusterModel Cluster model.
 */
private void ensureUtilizationUnderCapacity(ClusterModel clusterModel) throws OptimizationFailureException {
    Resource resource = resource();
    double capacityThreshold = _balancingConstraint.capacityThreshold(resource);
    for (Broker broker : clusterModel.brokers()) {
        // Host-level violation check.
        if (resource.isHostResource()) {
            double utilization = broker.host().load().expectedUtilizationFor(resource);
            double capacityLimit = broker.host().capacityFor(resource) * capacityThreshold;
            if (!broker.host().replicas().isEmpty() && utilization > capacityLimit) {
                // The utilization of the host for the resource is over the capacity limit.
                throw new OptimizationFailureException(String.format("Optimization for goal %s failed because %s utilization " + "for host %s is %f which is above capacity limit %f.", name(), resource, broker.host().name(), utilization, capacityLimit));
            }
        }
        // Broker-level violation check.
        if (resource.isBrokerResource()) {
            double utilization = broker.load().expectedUtilizationFor(resource);
            double capacityLimit = broker.capacityFor(resource) * capacityThreshold;
            if (!broker.replicas().isEmpty() && utilization > capacityLimit) {
                // The utilization of the broker for the resource is over the capacity limit.
                throw new OptimizationFailureException(String.format("Optimization for goal %s failed because %s utilization " + "for broker %d is %f which is above capacity limit %f.", name(), resource, broker.id(), utilization, capacityLimit));
            }
        }
    }
}
Also used : Broker(com.linkedin.kafka.cruisecontrol.model.Broker) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Resource(com.linkedin.kafka.cruisecontrol.common.Resource)

Example 4 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class CapacityGoal method initGoalState.

/**
 * Sanity checks: Existing total load on cluster is less than the limiting capacity
 * determined by the total capacity of healthy cluster multiplied by the capacity threshold.
 *
 * @param clusterModel The state of the cluster.
 * @param excludedTopics The topics that should be excluded from the optimization proposals.
 */
@Override
protected void initGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // Sanity Check -- i.e. not enough resources.
    Load recentClusterLoad = clusterModel.load();
    // While proposals exclude the excludedTopics, the existingUtilization still considers replicas of the excludedTopics.
    double existingUtilization = recentClusterLoad.expectedUtilizationFor(resource());
    double allowedCapacity = clusterModel.capacityFor(resource()) * _balancingConstraint.capacityThreshold(resource());
    if (allowedCapacity < existingUtilization) {
        throw new OptimizationFailureException("Insufficient healthy cluster capacity for resource:" + resource() + " existing cluster utilization " + existingUtilization + " allowed capacity " + allowedCapacity);
    }
}
Also used : Load(com.linkedin.kafka.cruisecontrol.model.Load) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)

Example 5 with OptimizationFailureException

use of com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException in project cruise-control by linkedin.

the class RackAwareGoal method initGoalState.

/**
 * This is a hard goal; hence, the proposals are not limited to dead broker replicas in case of self-healing.
 * Sanity Check: There exists sufficient number of racks for achieving rack-awareness.
 *
 * @param clusterModel The state of the cluster.
 * @param excludedTopics The topics that should be excluded from the optimization proposals.
 */
@Override
protected void initGoalState(ClusterModel clusterModel, Set<String> excludedTopics) throws OptimizationFailureException {
    // Sanity Check: not enough racks to satisfy rack awareness.
    int numHealthyRacks = clusterModel.numHealthyRacks();
    if (!excludedTopics.isEmpty()) {
        int maxReplicationFactorOfIncludedTopics = 1;
        Map<String, Integer> replicationFactorByTopic = clusterModel.replicationFactorByTopic();
        for (Map.Entry<String, Integer> replicationFactorByTopicEntry : replicationFactorByTopic.entrySet()) {
            if (!excludedTopics.contains(replicationFactorByTopicEntry.getKey())) {
                maxReplicationFactorOfIncludedTopics = Math.max(maxReplicationFactorOfIncludedTopics, replicationFactorByTopicEntry.getValue());
                if (maxReplicationFactorOfIncludedTopics > numHealthyRacks) {
                    throw new OptimizationFailureException("Insufficient number of racks to distribute included replicas.");
                }
            }
        }
    } else if (clusterModel.maxReplicationFactor() > numHealthyRacks) {
        throw new OptimizationFailureException("Insufficient number of racks to distribute each replica.");
    }
}
Also used : OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) Map(java.util.Map) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)

Aggregations

OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)15 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)8 Replica (com.linkedin.kafka.cruisecontrol.model.Replica)8 HashSet (java.util.HashSet)3 Map (java.util.Map)3 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)2 Resource (com.linkedin.kafka.cruisecontrol.common.Resource)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)1 Load (com.linkedin.kafka.cruisecontrol.model.Load)1 Partition (com.linkedin.kafka.cruisecontrol.model.Partition)1 List (java.util.List)1 TreeSet (java.util.TreeSet)1