use of com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions in project cruise-control by linkedin.
the class ReplicaDistributionGoal method rebalanceByMovingReplicasIn.
private boolean rebalanceByMovingReplicasIn(Broker aliveDestBroker, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
long moveStartTimeMs = System.currentTimeMillis();
PriorityQueue<Broker> eligibleBrokers = new PriorityQueue<>((b1, b2) -> {
// Brokers are sorted by (1) current offline replica count then (2) all replica count then (3) broker id.
int resultByOfflineReplicas = Integer.compare(b2.currentOfflineReplicas().size(), b1.currentOfflineReplicas().size());
if (resultByOfflineReplicas == 0) {
int resultByAllReplicas = Integer.compare(b2.replicas().size(), b1.replicas().size());
return resultByAllReplicas == 0 ? Integer.compare(b1.id(), b2.id()) : resultByAllReplicas;
}
return resultByOfflineReplicas;
});
// Source broker can be dead, alive, or may have bad disks.
if (_fixOfflineReplicasOnly) {
clusterModel.brokers().stream().filter(sourceBroker -> sourceBroker.id() != aliveDestBroker.id()).forEach(eligibleBrokers::add);
} else {
for (Broker sourceBroker : clusterModel.brokers()) {
if (sourceBroker.replicas().size() > _balanceLowerLimit || !sourceBroker.currentOfflineReplicas().isEmpty() || isExcludedForReplicaMove(sourceBroker)) {
eligibleBrokers.add(sourceBroker);
}
}
}
List<Broker> candidateBrokers = Collections.singletonList(aliveDestBroker);
boolean fastMode = optimizationOptions.fastMode();
// Stop when no replicas can be moved in anymore.
while (!eligibleBrokers.isEmpty()) {
if (fastMode && remainingTimeMs(_balancingConstraint.fastModePerBrokerMoveTimeoutMs(), moveStartTimeMs) <= 0) {
LOG.debug("Move replicas in timeout in fast mode for broker {}.", aliveDestBroker.id());
break;
}
Broker sourceBroker = eligibleBrokers.poll();
for (Replica replica : sourceBroker.trackedSortedReplicas(replicaSortName(this, false, false)).sortedReplicas(true)) {
Broker b = maybeApplyBalancingAction(clusterModel, replica, candidateBrokers, ActionType.INTER_BROKER_REPLICA_MOVEMENT, optimizedGoals, optimizationOptions);
// has nothing to move in. In that case we will never reenqueue that source broker.
if (b != null) {
if (aliveDestBroker.replicas().size() >= _balanceLowerLimit) {
// Note that the broker passed to this method is always alive; hence, there is no need to check if it is dead.
return false;
}
// and switch to the next broker.
if (!eligibleBrokers.isEmpty()) {
int result = Integer.compare(sourceBroker.currentOfflineReplicas().size(), eligibleBrokers.peek().currentOfflineReplicas().size());
if (result == -1 || (result == 0 && sourceBroker.replicas().size() < eligibleBrokers.peek().replicas().size())) {
eligibleBrokers.add(sourceBroker);
break;
}
}
}
}
}
return true;
}
use of com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions in project cruise-control by linkedin.
the class LeaderReplicaDistributionGoal method rebalanceByMovingLeadershipOut.
private boolean rebalanceByMovingLeadershipOut(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
long moveStartTimeMs = System.currentTimeMillis();
if (!clusterModel.deadBrokers().isEmpty()) {
return true;
}
// If the source broker is excluded for replica move, set its upper limit to 0.
int balanceUpperLimitForSourceBroker = isExcludedForReplicaMove(broker) ? 0 : _balanceUpperLimit;
int numLeaderReplicas = broker.leaderReplicas().size();
Set<String> excludedTopics = optimizationOptions.excludedTopics();
boolean fastMode = optimizationOptions.fastMode();
for (Replica leader : new HashSet<>(broker.leaderReplicas())) {
if (fastMode && remainingTimeMs(_balancingConstraint.fastModePerBrokerMoveTimeoutMs(), moveStartTimeMs) <= 0) {
LOG.debug("Move leadership out timeout in fast mode for broker {}.", broker.id());
break;
}
if (excludedTopics.contains(leader.topicPartition().topic())) {
continue;
}
Set<Broker> candidateBrokers = clusterModel.partition(leader.topicPartition()).partitionBrokers().stream().filter(b -> b != broker && !b.replica(leader.topicPartition()).isCurrentOffline()).collect(Collectors.toSet());
Broker b = maybeApplyBalancingAction(clusterModel, leader, candidateBrokers, ActionType.LEADERSHIP_MOVEMENT, optimizedGoals, optimizationOptions);
// Only check if we successfully moved something.
if (b != null) {
if (--numLeaderReplicas <= balanceUpperLimitForSourceBroker) {
return false;
}
}
}
return true;
}
use of com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions in project cruise-control by linkedin.
the class RemoveBrokersRunnable method workWithClusterModel.
@Override
protected OptimizerResult workWithClusterModel() throws KafkaCruiseControlException, TimeoutException, NotEnoughValidWindowsException {
ClusterModel clusterModel = _kafkaCruiseControl.clusterModel(_combinedCompletenessRequirements, _allowCapacityEstimation, _operationProgress);
sanityCheckBrokersHavingOfflineReplicasOnBadDisks(_goals, clusterModel);
_removedBrokerIds.forEach(id -> clusterModel.setBrokerState(id, Broker.State.DEAD));
if (!clusterModel.isClusterAlive()) {
throw new IllegalArgumentException("All brokers are dead in the cluster.");
}
if (!_destinationBrokerIds.isEmpty()) {
_kafkaCruiseControl.sanityCheckBrokerPresence(_destinationBrokerIds);
}
OptimizationOptions optimizationOptions = computeOptimizationOptions(clusterModel, false, _kafkaCruiseControl, _destinationBrokerIds, _dryRun, _excludeRecentlyDemotedBrokers, _excludeRecentlyRemovedBrokers, _excludedTopics, _destinationBrokerIds, false, _fastMode);
OptimizerResult result = _kafkaCruiseControl.optimizations(clusterModel, _goalsByPriority, _operationProgress, null, optimizationOptions);
if (!_dryRun) {
_kafkaCruiseControl.executeRemoval(result.goalProposals(), _throttleRemovedBrokers, _removedBrokerIds, isKafkaAssignerMode(_goals), _concurrentInterBrokerPartitionMovements, _maxInterBrokerPartitionMovements, _concurrentLeaderMovements, _executionProgressCheckIntervalMs, _replicaMovementStrategy, _replicationThrottle, _isTriggeredByUserRequest, _uuid);
}
return result;
}
use of com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions in project cruise-control by linkedin.
the class ResourceDistributionGoal method rebalanceBySwappingLoadOut.
private boolean rebalanceBySwappingLoadOut(Broker broker, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions, boolean moveImmigrantsOnly) {
long swapStartTimeMs = System.currentTimeMillis();
if (!broker.isAlive() || optimizationOptions.excludedBrokersForReplicaMove().contains(broker.id())) {
// If the source broker is (1) dead, or (2) excluded for replica move, then swap operation is not possible.
return true;
}
Set<String> excludedTopics = optimizationOptions.excludedTopics();
// Get the replicas to swap.
String sourceReplicaSortName = sortedCandidateReplicas(broker, clusterModel, excludedTopics, 0.0, false, false, resource() == Resource.NW_OUT, moveImmigrantsOnly);
SortedSet<Replica> sourceReplicas = broker.trackedSortedReplicas(sourceReplicaSortName).sortedReplicas(false);
if (sourceReplicas.isEmpty()) {
// Source broker has no filtered replica to swap.
broker.untrackSortedReplicas(sourceReplicaSortName);
return true;
}
// If this broker is excluded for leadership, then it can swapped with only followers.
double maxSourceReplicaLoad = getMaxReplicaLoad(sourceReplicas);
boolean swapWithFollowersOnly = optimizationOptions.excludedBrokersForLeadership().contains(broker.id());
PriorityQueue<Broker> candidateBrokerPQ = new PriorityQueue<>(_brokerComparator);
String candidateReplicaSortName = null;
for (Broker candidate : clusterModel.aliveBrokersUnderThreshold(resource(), _balanceUpperThreshold).stream().filter(b -> !b.replicas().isEmpty()).collect(Collectors.toSet())) {
// Get candidate replicas on candidate broker to try swapping with -- sorted in the order of trial (ascending load).
candidateReplicaSortName = sortedCandidateReplicas(candidate, clusterModel, excludedTopics, maxSourceReplicaLoad, true, swapWithFollowersOnly, false, moveImmigrantsOnly);
candidateBrokerPQ.add(candidate);
}
long perBrokerSwapTimeoutMs = 2 * _balancingConstraint.fastModePerBrokerMoveTimeoutMs();
while (!candidateBrokerPQ.isEmpty()) {
if (remainingTimeMs(perBrokerSwapTimeoutMs, swapStartTimeMs) <= 0) {
LOG.debug("Swap load out timeout for broker {}.", broker.id());
break;
}
Broker cb = candidateBrokerPQ.poll();
Replica swappedInReplica = null;
for (Replica sourceReplica : sourceReplicas) {
// Try swapping the source with the candidate replicas. Get the swapped in replica if successful, null otherwise.
Replica swappedIn = maybeApplySwapAction(clusterModel, sourceReplica, cb.trackedSortedReplicas(candidateReplicaSortName).sortedReplicas(false), optimizedGoals, optimizationOptions);
if (swappedIn != null) {
if (isLoadUnderBalanceUpperLimit(broker)) {
// Successfully balanced this broker by swapping in.
clusterModel.clearSortedReplicas();
return false;
}
// Add swapped in/out replica for updating the list of replicas in source broker.
swappedInReplica = swappedIn;
break;
} else if (remainingTimeMs(perBrokerSwapTimeoutMs, swapStartTimeMs) <= 0) {
LOG.debug("Swap load out timeout for source replica {}.", sourceReplica);
clusterModel.clearSortedReplicas();
return true;
}
}
if (swappedInReplica != null) {
sourceReplicas = broker.trackedSortedReplicas(sourceReplicaSortName).sortedReplicas(false);
// The broker is still considered as an eligible candidate replica, because the swap was successful -- i.e. there
// might be other potential candidate replicas on it to swap with.
candidateBrokerPQ.add(cb);
}
}
clusterModel.clearSortedReplicas();
return true;
}
use of com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions in project cruise-control by linkedin.
the class TopicReplicaDistributionGoal method rebalanceByMovingReplicasIn.
private boolean rebalanceByMovingReplicasIn(Broker aliveDestBroker, String topic, ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
PriorityQueue<Broker> eligibleBrokers = new PriorityQueue<>((b1, b2) -> {
// Brokers are sorted by (1) current offline topic replica count then (2) all topic replica count then (3) broker id.
// B2 Info
Collection<Replica> replicasOfTopicInB2 = b2.replicasOfTopicInBroker(topic);
int numReplicasOfTopicInB2 = replicasOfTopicInB2.size();
int numOfflineTopicReplicasInB2 = GoalUtils.retainCurrentOfflineBrokerReplicas(b2, replicasOfTopicInB2).size();
// B1 Info
Collection<Replica> replicasOfTopicInB1 = b1.replicasOfTopicInBroker(topic);
int numReplicasOfTopicInB1 = replicasOfTopicInB1.size();
int numOfflineTopicReplicasInB1 = GoalUtils.retainCurrentOfflineBrokerReplicas(b1, replicasOfTopicInB1).size();
int resultByOfflineReplicas = Integer.compare(numOfflineTopicReplicasInB2, numOfflineTopicReplicasInB1);
if (resultByOfflineReplicas == 0) {
int resultByAllReplicas = Integer.compare(numReplicasOfTopicInB2, numReplicasOfTopicInB1);
return resultByAllReplicas == 0 ? Integer.compare(b1.id(), b2.id()) : resultByAllReplicas;
}
return resultByOfflineReplicas;
});
// Source broker can be dead, alive, or may have bad disks.
if (_fixOfflineReplicasOnly) {
clusterModel.brokers().stream().filter(sourceBroker -> sourceBroker.id() != aliveDestBroker.id()).forEach(eligibleBrokers::add);
} else {
for (Broker sourceBroker : clusterModel.brokers()) {
if (sourceBroker.numReplicasOfTopicInBroker(topic) > _balanceLowerLimitByTopic.get(topic) || !sourceBroker.currentOfflineReplicas().isEmpty() || isExcludedForReplicaMove(sourceBroker)) {
eligibleBrokers.add(sourceBroker);
}
}
}
Collection<Replica> replicasOfTopicInBroker = aliveDestBroker.replicasOfTopicInBroker(topic);
int numReplicasOfTopicInBroker = replicasOfTopicInBroker.size();
Set<Broker> candidateBrokers = Collections.singleton(aliveDestBroker);
// Stop when no topic replicas can be moved in anymore.
while (!eligibleBrokers.isEmpty()) {
Broker sourceBroker = eligibleBrokers.poll();
SortedSet<Replica> replicasToMove = replicasToMoveOut(sourceBroker, topic);
int numOfflineTopicReplicas = GoalUtils.retainCurrentOfflineBrokerReplicas(sourceBroker, replicasToMove).size();
for (Replica replica : replicasToMove) {
boolean wasOffline = replica.isCurrentOffline();
Broker b = maybeApplyBalancingAction(clusterModel, replica, candidateBrokers, ActionType.INTER_BROKER_REPLICA_MOVEMENT, optimizedGoals, optimizationOptions);
// has nothing to move in. In that case we will never reenqueue that source broker.
if (b != null) {
if (wasOffline) {
numOfflineTopicReplicas--;
}
if (++numReplicasOfTopicInBroker >= _balanceLowerLimitByTopic.get(topic)) {
// Note that the broker passed to this method is always alive; hence, there is no need to check if it is dead.
return false;
}
// the eligible broker in the queue, we reenqueue the source broker and switch to the next broker.
if (!eligibleBrokers.isEmpty() && numOfflineTopicReplicas == 0 && sourceBroker.numReplicasOfTopicInBroker(topic) < eligibleBrokers.peek().numReplicasOfTopicInBroker(topic)) {
eligibleBrokers.add(sourceBroker);
break;
}
}
}
}
return true;
}
Aggregations