Search in sources :

Example 6 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class ReplicaDistributionGoal method updateGoalState.

@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    super.updateGoalState(clusterModel, optimizationOptions);
    Integer numBrokersToDrop = numBrokersToDrop(clusterModel);
    if (numBrokersToDrop != null) {
        ProvisionRecommendation recommendation = new ProvisionRecommendation.Builder(ProvisionStatus.OVER_PROVISIONED).numBrokers(numBrokersToDrop).build();
        _provisionResponse = new ProvisionResponse(ProvisionStatus.OVER_PROVISIONED, recommendation, name());
    } else if (_succeeded) {
        // The cluster is not overprovisioned and all brokers are within the upper and lower balance limits.
        _provisionResponse = new ProvisionResponse(ProvisionStatus.RIGHT_SIZED);
    }
}
Also used : ProvisionRecommendation(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation) ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)

Example 7 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class RackAwareGoal method updateGoalState.

/**
 * Update goal state.
 * Sanity check: After completion of balancing / self-healing, confirm that replicas of each partition reside at a
 * separate rack.
 *
 * @param clusterModel The state of the cluster.
 * @param optimizationOptions Options to take into account during optimization.
 */
@Override
protected void updateGoalState(ClusterModel clusterModel, OptimizationOptions optimizationOptions) throws OptimizationFailureException {
    // One pass is sufficient to satisfy or alert impossibility of this goal.
    // Sanity check to confirm that the final distribution is rack aware.
    ensureRackAware(clusterModel, optimizationOptions);
    // Sanity check: No self-healing eligible replica should remain at a dead broker/disk.
    GoalUtils.ensureNoOfflineReplicas(clusterModel, name());
    // Sanity check: No replica should be moved to a broker, which used to host any replica of the same partition on its broken disk.
    GoalUtils.ensureReplicasMoveOffBrokersWithBadDisks(clusterModel, name());
    if (_provisionResponse.status() != ProvisionStatus.OVER_PROVISIONED) {
        _provisionResponse = new ProvisionResponse(ProvisionStatus.RIGHT_SIZED);
    }
    finish();
}
Also used : ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)

Example 8 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class GoalViolationDetector method setBalancednessWithOfflineReplicas.

protected void setBalancednessWithOfflineReplicas() {
    _balancednessScore = BALANCEDNESS_SCORE_WITH_OFFLINE_REPLICAS;
    _provisionResponse = new ProvisionResponse(ProvisionStatus.UNDECIDED);
}
Also used : ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)

Example 9 with ProvisionResponse

use of com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse in project cruise-control by linkedin.

the class GoalViolationDetector method run.

@Override
public void run() {
    if (getGoalViolationDetectionStatus() != AnomalyDetectionStatus.READY) {
        return;
    }
    AutoCloseable clusterModelSemaphore = null;
    try {
        Map<String, Object> parameterConfigOverrides = Map.of(KAFKA_CRUISE_CONTROL_OBJECT_CONFIG, _kafkaCruiseControl, ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG, _kafkaCruiseControl.timeMs());
        GoalViolations goalViolations = _kafkaCruiseControl.config().getConfiguredInstance(AnomalyDetectorConfig.GOAL_VIOLATIONS_CLASS_CONFIG, GoalViolations.class, parameterConfigOverrides);
        boolean newModelNeeded = true;
        ClusterModel clusterModel = null;
        // Retrieve excluded brokers for leadership and replica move.
        ExecutorState executorState = null;
        if (_excludeRecentlyDemotedBrokers || _excludeRecentlyRemovedBrokers) {
            executorState = _kafkaCruiseControl.executorState();
        }
        Set<Integer> excludedBrokersForLeadership = _excludeRecentlyDemotedBrokers ? executorState.recentlyDemotedBrokers() : Collections.emptySet();
        Set<Integer> excludedBrokersForReplicaMove = _excludeRecentlyRemovedBrokers ? executorState.recentlyRemovedBrokers() : Collections.emptySet();
        ProvisionResponse provisionResponse = new ProvisionResponse(ProvisionStatus.UNDECIDED);
        boolean checkPartitionsWithRFGreaterThanNumRacks = true;
        final Timer.Context ctx = _goalViolationDetectionTimer.time();
        try {
            for (Goal goal : _detectionGoals) {
                if (_kafkaCruiseControl.loadMonitor().meetCompletenessRequirements(goal.clusterModelCompletenessRequirements())) {
                    LOG.debug("Detecting if {} is violated.", goal.name());
                    // Because the model generation could be slow, We only get new cluster model if needed.
                    if (newModelNeeded) {
                        if (clusterModelSemaphore != null) {
                            clusterModelSemaphore.close();
                        }
                        clusterModelSemaphore = _kafkaCruiseControl.acquireForModelGeneration(new OperationProgress());
                        // Make cluster model null before generating a new cluster model so the current one can be GCed.
                        clusterModel = null;
                        clusterModel = _kafkaCruiseControl.clusterModel(goal.clusterModelCompletenessRequirements(), _allowCapacityEstimation, new OperationProgress());
                        // Detection and fix for dead brokers/disks is the responsibility of broker/disk failure detector.
                        if (skipDueToOfflineReplicas(clusterModel)) {
                            return;
                        }
                        _lastCheckedModelGeneration = clusterModel.generation();
                    }
                    newModelNeeded = optimizeForGoal(clusterModel, goal, goalViolations, excludedBrokersForLeadership, excludedBrokersForReplicaMove, checkPartitionsWithRFGreaterThanNumRacks);
                    // CC will check for partitions with RF greater than number of eligible racks just once, because regardless of the goal, the cluster
                    // will have the same (1) maximum replication factor and (2) rack count containing brokers that are eligible to host replicas.
                    checkPartitionsWithRFGreaterThanNumRacks = false;
                } else {
                    LOG.warn("Skipping goal violation detection for {} because load completeness requirement is not met.", goal);
                }
                provisionResponse.aggregate(goal.provisionResponse());
            }
        } finally {
            ctx.stop();
        }
        _provisionResponse = provisionResponse;
        if (_isProvisionerEnabled) {
            // Rightsize the cluster (if needed)
            ProvisionerState provisionerState = _provisioner.rightsize(_provisionResponse.recommendationByRecommender(), new RightsizeOptions());
            if (provisionerState != null) {
                LOG.info("Provisioner state: {}.", provisionerState);
                _automatedRightsizingMeter.mark();
            }
        }
        Map<Boolean, List<String>> violatedGoalsByFixability = goalViolations.violatedGoalsByFixability();
        if (!violatedGoalsByFixability.isEmpty()) {
            goalViolations.setProvisionResponse(_provisionResponse);
            _anomalies.add(goalViolations);
        }
        refreshBalancednessScore(violatedGoalsByFixability);
    } catch (NotEnoughValidWindowsException nevwe) {
        LOG.debug("Skipping goal violation detection because there are not enough valid windows.", nevwe);
    } catch (KafkaCruiseControlException kcce) {
        LOG.warn("Goal violation detector received exception", kcce);
    } catch (Exception e) {
        LOG.error("Unexpected exception", e);
    } finally {
        if (clusterModelSemaphore != null) {
            try {
                clusterModelSemaphore.close();
            } catch (Exception e) {
                LOG.error("Received exception when closing auto closable semaphore", e);
            }
        }
        LOG.debug("Goal violation detection finished.");
    }
}
Also used : OperationProgress(com.linkedin.kafka.cruisecontrol.async.progress.OperationProgress) ProvisionResponse(com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse) KafkaCruiseControlException(com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException) NotEnoughValidWindowsException(com.linkedin.cruisecontrol.exception.NotEnoughValidWindowsException) NotEnoughValidWindowsException(com.linkedin.cruisecontrol.exception.NotEnoughValidWindowsException) OptimizationFailureException(com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException) KafkaCruiseControlException(com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) Goal(com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal) KafkaCruiseControlUtils.balancednessCostByGoal(com.linkedin.kafka.cruisecontrol.KafkaCruiseControlUtils.balancednessCostByGoal) ExecutorState(com.linkedin.kafka.cruisecontrol.executor.ExecutorState) Timer(com.codahale.metrics.Timer) List(java.util.List)

Aggregations

ProvisionResponse (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse)9 OptimizationFailureException (com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException)5 ProvisionRecommendation (com.linkedin.kafka.cruisecontrol.analyzer.ProvisionRecommendation)3 BalancingConstraint (com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint)2 Broker (com.linkedin.kafka.cruisecontrol.model.Broker)2 SortedReplicasHelper (com.linkedin.kafka.cruisecontrol.model.SortedReplicasHelper)2 Timer (com.codahale.metrics.Timer)1 NotEnoughValidWindowsException (com.linkedin.cruisecontrol.exception.NotEnoughValidWindowsException)1 KafkaCruiseControlUtils.balancednessCostByGoal (com.linkedin.kafka.cruisecontrol.KafkaCruiseControlUtils.balancednessCostByGoal)1 Goal (com.linkedin.kafka.cruisecontrol.analyzer.goals.Goal)1 OperationProgress (com.linkedin.kafka.cruisecontrol.async.progress.OperationProgress)1 KafkaCruiseControlException (com.linkedin.kafka.cruisecontrol.exception.KafkaCruiseControlException)1 ExecutorState (com.linkedin.kafka.cruisecontrol.executor.ExecutorState)1 ClusterModel (com.linkedin.kafka.cruisecontrol.model.ClusterModel)1 ClusterModelStats (com.linkedin.kafka.cruisecontrol.model.ClusterModelStats)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1