use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.
the class RackAwareGoal method initGoalState.
/**
* This is a hard goal; hence, the proposals are not limited to dead broker replicas in case of self-healing.
* Sanity Check: There exists sufficient number of racks for achieving rack-awareness.
*
* @param clusterModel The state of the cluster.
* @param optimizationOptions Options to take into account during optimization.
*/
@Override
protected void initGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
// Sanity Check: not enough racks to satisfy rack awareness.
int numAliveRacks = clusterModel.numAliveRacks();
Set<String> excludedTopics = optimizationOptions.excludedTopics();
if (!excludedTopics.isEmpty()) {
int maxReplicationFactorOfIncludedTopics = 1;
Map<String, Integer> replicationFactorByTopic = clusterModel.replicationFactorByTopic();
for (Map.Entry<String, Integer> replicationFactorByTopicEntry : replicationFactorByTopic.entrySet()) {
if (!excludedTopics.contains(replicationFactorByTopicEntry.getKey())) {
maxReplicationFactorOfIncludedTopics = Math.max(maxReplicationFactorOfIncludedTopics, replicationFactorByTopicEntry.getValue());
if (maxReplicationFactorOfIncludedTopics > numAliveRacks) {
int missingRacks = maxReplicationFactorOfIncludedTopics - numAliveRacks;
ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numRacks(missingRacks).build();
throw new OptimizationFailureException(String.format("[%s] Insufficient number of racks to distribute included replicas (Current: %d, Needed: %d).", name(), numAliveRacks, maxReplicationFactorOfIncludedTopics), recommendation);
}
}
}
} else if (clusterModel.maxReplicationFactor() > numAliveRacks) {
int missingRacks = clusterModel.maxReplicationFactor() - numAliveRacks;
ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numRacks(missingRacks).build();
throw new OptimizationFailureException(String.format("[%s] Insufficient number of racks to distribute each replica (Current: %d, Needed: %d).", name(), numAliveRacks, clusterModel.maxReplicationFactor()), recommendation);
}
int numExtraRacks = numAliveRacks - clusterModel.maxReplicationFactor();
if (numExtraRacks >= _balancingConstraint.overprovisionedMinExtraRacks()) {
int numRacksToDrop = numExtraRacks - _balancingConstraint.overprovisionedMinExtraRacks() + 1;
ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.OVER_PROVISIONED).numRacks(numRacksToDrop).build();
_provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, recommendation, name());
}
// Filter out some replicas based on optimization options.
new SortedReplicasHelper().maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectImmigrants(), optimizationOptions.onlyMoveImmigrantReplicas()).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).trackSortedReplicasFor(replicaSortName(this, false, false), clusterModel);
}
use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.
the class ResourceDistributionGoal method updateGoalState.
/**
* Update the current resource that is being balanced if there are still resources to be balanced, finish otherwise.
*
* @param clusterModel The state of the cluster.
* @param optimizationOptions Options to take into account during optimization.
*/
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
Set<Integer> brokerIdsAboveBalanceUpperLimit = new HashSet<>();
Set<Integer> brokerIdsUnderBalanceLowerLimit = new HashSet<>();
// While proposals exclude the excludedTopics, the balance still considers utilization of the excludedTopic replicas.
for (Broker broker : clusterModel.aliveBrokers()) {
if (!isLoadUnderBalanceUpperLimit(broker)) {
brokerIdsAboveBalanceUpperLimit.add(broker.id());
}
if (!isExcludedForReplicaMove(broker) && !isLoadAboveBalanceLowerLimit(broker)) {
// A broker that is excluded for replica moves cannot be under the balance lower limit.
brokerIdsUnderBalanceLowerLimit.add(broker.id());
}
}
if (!brokerIdsAboveBalanceUpperLimit.isEmpty()) {
LOG.debug("Utilization for broker ids:{} {} above the balance limit for:{} after {}.", brokerIdsAboveBalanceUpperLimit, (brokerIdsAboveBalanceUpperLimit.size() > 1) ? "are" : "is", resource(), (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
_succeeded = false;
} else if (_isLowUtilization) {
// Cluster is under a low utilization state and all brokers are under the corresponding balance upper limit.
_provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, _overProvisionedRecommendation, name());
}
if (!brokerIdsUnderBalanceLowerLimit.isEmpty()) {
LOG.debug("Utilization for broker ids:{} {} under the balance limit for:{} after {}.", brokerIdsUnderBalanceLowerLimit, (brokerIdsUnderBalanceLowerLimit.size() > 1) ? "are" : "is", resource(), (clusterModel.selfHealingEligibleReplicas().isEmpty()) ? "rebalance" : "self-healing");
_succeeded = false;
} else if (brokerIdsAboveBalanceUpperLimit.isEmpty() && !_isLowUtilization) {
// All brokers are within the upper and lower balance limits and the cluster is not under a low utilization state.
_provisionResponse = new ProvisionResponse(ProvisionStatus.RIGHT_SIZED);
}
// Sanity check: No self-healing eligible replica should remain at a dead broker/disk.
try {
GoalUtils.ensureNoOfflineReplicas(clusterModel, name());
} catch (OptimizationFailureException ofe) {
if (_fixOfflineReplicasOnly) {
throw ofe;
}
_fixOfflineReplicasOnly = true;
LOG.info("Ignoring resource balance limit to move replicas from dead brokers/disks.");
return;
}
// Sanity check: No replica should be moved to a broker, which used to host any replica of the same partition on its broken disk.
GoalUtils.ensureReplicasMoveOffBrokersWithBadDisks(clusterModel, name());
finish();
}
use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.
the class AbstractGoal method optimize.
@Override
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
try {
_succeeded = true;
// Resetting the provision response ensures fresh provision response if the same goal is optimized multiple times.
_provisionResponse = new ProvisionResponse(UNDECIDED);
LOG.debug("Starting optimization for {}.", name());
// Initialize pre-optimized stats.
ClusterModelStats statsBeforeOptimization = clusterModel.getClusterStats(_balancingConstraint, optimizationOptions);
LOG.trace("[PRE - {}] {}", name(), statsBeforeOptimization);
_finished = false;
long goalStartTime = System.currentTimeMillis();
initGoalState(clusterModel, optimizationOptions);
SortedSet<Broker> brokenBrokers = clusterModel.brokenBrokers();
boolean originallyHasExcludedBrokersForReplicaMoveWithReplicas = hasExcludedBrokersForReplicaMoveWithReplicas(clusterModel, optimizationOptions);
while (!_finished) {
for (Broker broker : brokersToBalance(clusterModel)) {
rebalanceForBroker(broker, clusterModel, optimizedGoals, optimizationOptions);
}
updateGoalState(clusterModel, optimizationOptions);
}
ClusterModelStats statsAfterOptimization = clusterModel.getClusterStats(_balancingConstraint, optimizationOptions);
LOG.trace("[POST - {}] {}", name(), statsAfterOptimization);
if (LOG.isDebugEnabled()) {
LOG.debug("Finished optimization for {} in {}ms.", name(), System.currentTimeMillis() - goalStartTime);
}
LOG.trace("Cluster after optimization is {}", clusterModel);
// The optimization cannot make stats worse unless the cluster has (1) broken brokers or (2) excluded brokers for replica move with replicas.
if (brokenBrokers.isEmpty() && !originallyHasExcludedBrokersForReplicaMoveWithReplicas) {
ClusterModelStatsComparator comparator = clusterModelStatsComparator();
// Throw exception when the stats before optimization is preferred.
if (comparator.compare(statsAfterOptimization, statsBeforeOptimization) < 0) {
// If a goal provides worse stats after optimization, that indicates an implementation error with the goal.
throw new IllegalStateException(String.format("Optimization for goal %s failed because the optimized result is worse than before." + " Reason: %s.", name(), comparator.explainLastComparison()));
}
}
// Ensure that a cluster is not identified as over provisioned unless it has the minimum required number of alive brokers.
if (_provisionResponse.status() == OVER_PROVISIONED && clusterModel.aliveBrokers().size() < _balancingConstraint.overprovisionedMinBrokers()) {
_provisionResponse = new ProvisionResponse(RIGHT_SIZED);
}
return _succeeded;
} catch (OptimizationFailureException ofe) {
_provisionResponse = new ProvisionResponse(UNDER_PROVISIONED, ofe.provisionRecommendation(), name());
// Mitigation (if relevant) is reported as part of exception message to provide helpful tips concerning the used optimizationOptions.
String mitigation = GoalUtils.mitigationForOptimizationFailures(optimizationOptions);
String message = String.format("%s%s", ofe.getMessage(), mitigation.isEmpty() ? "" : String.format(" || Tips: %s", mitigation));
throw new OptimizationFailureException(message, ofe.provisionRecommendation());
} finally {
// Clear any sorted replicas tracked in the process of optimization.
clusterModel.clearSortedReplicas();
}
}
use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.
the class RackAwareDistributionGoal method updateGoalState.
/**
* Update goal state.
* Sanity check: After completion of balancing / self-healing, confirm that replicas of each partition are evenly
* distributed across the racks.
*
* @param clusterModel The state of the cluster.
* @param optimizationOptions Options to take into account during optimization.
*/
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
// Sanity check: No self-healing eligible replica should remain at a dead broker/disk.
GoalUtils.ensureNoOfflineReplicas(clusterModel, name());
// One pass is sufficient to satisfy or alert impossibility of this goal.
// Sanity check to confirm that replicas of each partition are evenly distributed across the racks
ensureRackAwareDistribution(clusterModel, optimizationOptions);
// Sanity check: No replica should be moved to a broker, which used to host any replica of the same partition on its broken disk.
GoalUtils.ensureReplicasMoveOffBrokersWithBadDisks(clusterModel, name());
if (_provisionResponse.status() != ProvisionStatus.OVER_PROVISIONED) {
_provisionResponse = new ProvisionResponse(ProvisionStatus.RIGHT_SIZED);
}
finish();
}
use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.
the class RackAwareDistributionGoal method initGoalState.
/**
* This is a hard goal; hence, the proposals are not limited to dead broker replicas in case of self-healing.
*
* @param clusterModel The state of the cluster.
* @param optimizationOptions Options to take into account during optimization.
*/
@Override
protected void initGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
_brokersAllowedReplicaMove = GoalUtils.aliveBrokersNotExcludedForReplicaMove(clusterModel, optimizationOptions);
if (_brokersAllowedReplicaMove.isEmpty()) {
// Handle the case when all alive brokers are excluded from replica moves.
ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.UNDER_PROVISIONED).numBrokers(clusterModel.maxReplicationFactor()).build();
throw new OptimizationFailureException(String.format("[%s] All alive brokers are excluded from replica moves.", name()), recommendation);
}
_balanceLimit = new BalanceLimit(clusterModel, optimizationOptions);
int numExtraRacks = _balanceLimit.numAliveRacksAllowedReplicaMoves() - clusterModel.maxReplicationFactor();
if (numExtraRacks >= _balancingConstraint.overprovisionedMinExtraRacks()) {
int numRacksToDrop = numExtraRacks - _balancingConstraint.overprovisionedMinExtraRacks() + 1;
ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.OVER_PROVISIONED).numRacks(numRacksToDrop).build();
_provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, recommendation, name());
}
Set<String> excludedTopics = optimizationOptions.excludedTopics();
// Filter out some replicas based on optimization options.
new SortedReplicasHelper().maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectImmigrants(), optimizationOptions.onlyMoveImmigrantReplicas()).maybeAddSelectionFunc(ReplicaSortFunctionFactory.selectReplicasBasedOnExcludedTopics(excludedTopics), !excludedTopics.isEmpty()).trackSortedReplicasFor(replicaSortName(this, false, false), clusterModel);
}
Aggregations