use of io.strimzi.api.kafka.model.KafkaRebalance in project strimzi by strimzi.
the class KafkaRebalanceAssemblyOperator method onRebalancing.
/**
* This method handles the transition from {@code Rebalancing} state.
* It starts a periodic timer in order to check the status of the ongoing rebalance processing on Cruise Control side.
* In order to do that, it calls the related Cruise Control REST API about asking the user task status.
* When the rebalance is finished, the next state is {@code Ready}.
* If the user sets the strimzi.io/rebalance annotation to 'stop', it calls the Cruise Control REST API for stopping the ongoing task
* and then transitions to the {@code Stopped} state.
* If the user sets any other values for the strimzi.io/rebalance annotation, it is just ignored and the user task checks continue.
* This method holds the lock until the rebalance is finished, the ongoing task is stopped or any exception is raised.
*
* @param reconciliation Reconciliation information
* @param host Cruise Control service to which sending the REST API requests
* @param apiClient Cruise Control REST API client instance
* @param kafkaRebalance Current {@code KafkaRebalance} resource
* @param rebalanceAnnotation The current value for the strimzi.io/rebalance annotation
* @return a Future with the next {@code MapAndStatus<ConfigMap, KafkaRebalanceStatus>} including the state
*/
private Future<MapAndStatus<ConfigMap, KafkaRebalanceStatus>> onRebalancing(Reconciliation reconciliation, String host, CruiseControlApi apiClient, KafkaRebalance kafkaRebalance, KafkaRebalanceAnnotation rebalanceAnnotation) {
Promise<MapAndStatus<ConfigMap, KafkaRebalanceStatus>> p = Promise.promise();
if (rebalanceAnnotation == KafkaRebalanceAnnotation.none) {
LOGGER.infoCr(reconciliation, "Starting Cruise Control rebalance user task status timer");
String sessionId = kafkaRebalance.getStatus().getSessionId();
AtomicInteger ccApiErrorCount = new AtomicInteger();
vertx.setPeriodic(REBALANCE_POLLING_TIMER_MS, t -> {
// Check that we have not already failed to contact the API beyond the allowed number of times.
if (ccApiErrorCount.get() >= MAX_API_RETRIES) {
vertx.cancelTimer(t);
p.fail(new CruiseControlRestException("Unable to reach Cruise Control API after " + MAX_API_RETRIES + " attempts"));
}
kafkaRebalanceOperator.getAsync(kafkaRebalance.getMetadata().getNamespace(), kafkaRebalance.getMetadata().getName()).onSuccess(currentKafkaRebalance -> {
// Checking that the resource was not deleted between periodic polls
if (currentKafkaRebalance != null) {
// Safety check as timer might be called again (from a delayed timer firing)
if (state(currentKafkaRebalance) == KafkaRebalanceState.Rebalancing) {
if (rebalanceAnnotation(reconciliation, currentKafkaRebalance) == KafkaRebalanceAnnotation.stop) {
LOGGER.debugCr(reconciliation, "Stopping current Cruise Control rebalance user task");
vertx.cancelTimer(t);
apiClient.stopExecution(host, CruiseControl.REST_API_PORT).onSuccess(r -> p.complete(buildRebalanceStatus(null, KafkaRebalanceState.Stopped, validate(reconciliation, kafkaRebalance)))).onFailure(e -> {
LOGGER.errorCr(reconciliation, "Cruise Control stopping execution failed", e.getCause());
p.fail(e.getCause());
});
} else {
LOGGER.infoCr(reconciliation, "Getting Cruise Control rebalance user task status");
Set<Condition> conditions = validate(reconciliation, kafkaRebalance);
validateAnnotation(reconciliation, conditions, KafkaRebalanceState.Rebalancing, rebalanceAnnotation(reconciliation, currentKafkaRebalance), kafkaRebalance);
apiClient.getUserTaskStatus(host, CruiseControl.REST_API_PORT, sessionId).onSuccess(cruiseControlResponse -> {
JsonObject taskStatusJson = cruiseControlResponse.getJson();
CruiseControlUserTaskStatus taskStatus = CruiseControlUserTaskStatus.lookup(taskStatusJson.getString("Status"));
switch(taskStatus) {
case COMPLETED:
vertx.cancelTimer(t);
LOGGER.infoCr(reconciliation, "Rebalance ({}) is now complete", sessionId);
p.complete(buildRebalanceStatus(kafkaRebalance, null, KafkaRebalanceState.Ready, taskStatusJson, conditions));
break;
case COMPLETED_WITH_ERROR:
// TODO: There doesn't seem to be a way to retrieve the actual error message from the user tasks endpoint?
// We may need to propose an upstream PR for this.
// TODO: Once we can get the error details we need to add an error field to the Rebalance Status to hold
// details of any issues while rebalancing.
LOGGER.errorCr(reconciliation, "Rebalance ({}) optimization proposal has failed to complete", sessionId);
vertx.cancelTimer(t);
p.complete(buildRebalanceStatus(sessionId, KafkaRebalanceState.NotReady, conditions));
break;
case // Rebalance is still in progress
IN_EXECUTION:
// the proposal is complete but the optimisation proposal summary will be missing.
if (currentKafkaRebalance.getStatus().getOptimizationResult() == null || currentKafkaRebalance.getStatus().getOptimizationResult().isEmpty()) {
LOGGER.infoCr(reconciliation, "Rebalance ({}) optimization proposal is now ready and has been added to the status", sessionId);
// Cancel the timer so that the status is returned and updated.
vertx.cancelTimer(t);
p.complete(buildRebalanceStatus(kafkaRebalance, sessionId, KafkaRebalanceState.Rebalancing, taskStatusJson, conditions));
}
ccApiErrorCount.set(0);
// We can then update the status at this point.
break;
case // Rebalance proposal is still being calculated
ACTIVE:
// If a rebalance(dryrun=false) was called and the proposal is still being prepared then the task
// will be in an ACTIVE state. When the proposal is ready it will shift to IN_EXECUTION and we will
// check that the optimisation proposal is added to the status on the next reconcile.
LOGGER.infoCr(reconciliation, "Rebalance ({}) optimization proposal is still being prepared", sessionId);
ccApiErrorCount.set(0);
break;
default:
LOGGER.errorCr(reconciliation, "Unexpected state {}", taskStatus);
vertx.cancelTimer(t);
p.fail("Unexpected state " + taskStatus);
break;
}
}).onFailure(e -> {
LOGGER.errorCr(reconciliation, "Cruise Control getting rebalance task status failed", e.getCause());
// To make sure this error is not just a temporary problem with the network we retry several times.
// If the number of errors pass the MAX_API_ERRORS limit then the period method will fail the promise.
ccApiErrorCount.getAndIncrement();
});
}
} else {
p.complete(new MapAndStatus<>(null, currentKafkaRebalance.getStatus()));
}
} else {
LOGGER.debugCr(reconciliation, "Rebalance resource was deleted, stopping the request time");
vertx.cancelTimer(t);
p.complete();
}
}).onFailure(e -> {
LOGGER.errorCr(reconciliation, "Cruise Control getting rebalance resource failed", e.getCause());
vertx.cancelTimer(t);
p.fail(e.getCause());
});
});
} else {
p.complete(new MapAndStatus<>(null, kafkaRebalance.getStatus()));
}
return p.future();
}
use of io.strimzi.api.kafka.model.KafkaRebalance in project strimzi by strimzi.
the class KafkaRebalanceAssemblyOperatorTest method testNewWithMissingHardGoalsRebalance.
/**
* Tests the transition from 'New' to 'NotReady' due to "missing hard goals" error
*
* 1. A new KafkaRebalance resource is created with some specified not hard goals; it is in the 'New' state
* 2. The operator requests a rebalance proposal through the Cruise Control REST API
* 3. The operator gets a "missing hard goals" error instead of a proposal
* 4. The KafkaRebalance resource moves to the 'NotReady' state
*/
@Test
public void testNewWithMissingHardGoalsRebalance(VertxTestContext context) throws IOException, URISyntaxException {
KafkaRebalanceSpec kafkaRebalanceSpec = new KafkaRebalanceSpecBuilder().withGoals("DiskCapacityGoal", "CpuCapacityGoal").build();
KafkaRebalance kr = createKafkaRebalance(CLUSTER_NAMESPACE, CLUSTER_NAME, RESOURCE_NAME, kafkaRebalanceSpec);
this.krNewWithMissingHardGoals(context, CruiseControlEndpoints.REBALANCE, kr);
}
use of io.strimzi.api.kafka.model.KafkaRebalance in project strimzi by strimzi.
the class KafkaRebalanceAssemblyOperatorTest method krNewToProposalReadyToRebalancingToReadyThenRefresh.
private void krNewToProposalReadyToRebalancingToReadyThenRefresh(VertxTestContext context, int pendingCalls, int activeCalls, int inExecutionCalls, CruiseControlEndpoints endpoint, KafkaRebalance kr) throws IOException, URISyntaxException {
// Setup the rebalance and user tasks endpoints with the number of pending calls before a response is received.
MockCruiseControl.setupCCRebalanceResponse(ccServer, pendingCalls, endpoint);
MockCruiseControl.setupCCUserTasksResponseNoGoals(ccServer, activeCalls, inExecutionCalls);
Crds.kafkaRebalanceOperation(client).inNamespace(CLUSTER_NAMESPACE).create(kr);
when(mockKafkaOps.getAsync(CLUSTER_NAMESPACE, CLUSTER_NAME)).thenReturn(Future.succeededFuture(kafka));
mockSecretResources();
mockRebalanceOperator(mockRebalanceOps, mockCmOps, CLUSTER_NAMESPACE, kr.getMetadata().getName(), client);
Checkpoint checkpoint = context.checkpoint();
kcrao.reconcileRebalance(new Reconciliation("test-trigger", KafkaRebalance.RESOURCE_KIND, CLUSTER_NAMESPACE, kr.getMetadata().getName()), kr).onComplete(context.succeeding(v -> assertState(context, client, CLUSTER_NAMESPACE, kr.getMetadata().getName(), KafkaRebalanceState.ProposalReady))).compose(v -> {
// apply the "approve" annotation to the resource in the ProposalReady state
KafkaRebalance approvedKr = annotate(client, CLUSTER_NAMESPACE, kr.getMetadata().getName(), KafkaRebalanceAnnotation.approve);
return kcrao.reconcileRebalance(new Reconciliation("test-trigger", KafkaRebalance.RESOURCE_KIND, CLUSTER_NAMESPACE, kr.getMetadata().getName()), approvedKr);
}).onComplete(context.succeeding(v -> {
// the resource moved from ProposalReady to Rebalancing on approval
assertState(context, client, CLUSTER_NAMESPACE, kr.getMetadata().getName(), KafkaRebalanceState.Rebalancing);
})).compose(v -> {
// trigger another reconcile to process the Rebalancing state
KafkaRebalance kr4 = Crds.kafkaRebalanceOperation(client).inNamespace(CLUSTER_NAMESPACE).withName(kr.getMetadata().getName()).get();
return kcrao.reconcileRebalance(new Reconciliation("test-trigger", KafkaRebalance.RESOURCE_KIND, CLUSTER_NAMESPACE, kr.getMetadata().getName()), kr4);
}).onComplete(context.succeeding(v -> {
// the resource moved from Rebalancing to Ready
assertState(context, client, CLUSTER_NAMESPACE, kr.getMetadata().getName(), KafkaRebalanceState.Ready);
})).compose(v -> {
// apply the "refresh" annotation to the resource in the ProposalReady state
KafkaRebalance refreshKr = annotate(client, CLUSTER_NAMESPACE, kr.getMetadata().getName(), KafkaRebalanceAnnotation.refresh);
return kcrao.reconcileRebalance(new Reconciliation("test-trigger", KafkaRebalance.RESOURCE_KIND, CLUSTER_NAMESPACE, kr.getMetadata().getName()), refreshKr);
}).onComplete(context.succeeding(v -> {
// the resource moved from Ready to ProposalReady
assertState(context, client, CLUSTER_NAMESPACE, kr.getMetadata().getName(), KafkaRebalanceState.ProposalReady);
checkpoint.flag();
}));
}
use of io.strimzi.api.kafka.model.KafkaRebalance in project strimzi by strimzi.
the class KafkaRebalanceAssemblyOperatorTest method testKafkaClusterNotMatchingLabelSelector.
/**
* When the Kafka cluster does not match the selector labels in the cluster operator configuration, the
* KafkaRebalance resource should be ignored and not reconciled.
*/
@Test
public void testKafkaClusterNotMatchingLabelSelector(VertxTestContext context) {
KafkaRebalance kr = createKafkaRebalance(CLUSTER_NAMESPACE, CLUSTER_NAME, RESOURCE_NAME, EMPTY_KAFKA_REBALANCE_SPEC);
ResourceOperatorSupplier supplier = ResourceUtils.supplierWithMocks(true);
mockKafkaOps = supplier.kafkaOperator;
mockRebalanceOps = supplier.kafkaRebalanceOperator;
when(mockKafkaOps.getAsync(CLUSTER_NAMESPACE, CLUSTER_NAME)).thenReturn(Future.succeededFuture(kafka));
ClusterOperatorConfig config = new ClusterOperatorConfig(singleton(CLUSTER_NAMESPACE), 60_000, 120_000, 300_000, false, true, KafkaVersionTestUtils.getKafkaVersionLookup(), null, null, null, null, Labels.fromMap(Map.of("selectorLabel", "value")), "", 10, 10_000, 30, false, 1024);
kcrao = new KafkaRebalanceAssemblyOperator(Vertx.vertx(), supplier, config);
Checkpoint checkpoint = context.checkpoint();
kcrao.reconcileRebalance(new Reconciliation("test-trigger", KafkaRebalance.RESOURCE_KIND, CLUSTER_NAMESPACE, RESOURCE_NAME), kr).onComplete(context.succeeding(v -> context.verify(() -> {
// The labels of the Kafka resource do not match the => the KafkaRebalance should not be reconciled and the
// rebalance ops should have no interactions.
verifyNoInteractions(mockRebalanceOps);
checkpoint.flag();
})));
}
use of io.strimzi.api.kafka.model.KafkaRebalance in project strimzi by strimzi.
the class KafkaRebalanceAssemblyOperatorTest method testNewToProposalReadyToRebalancingToReadyThenRefreshRemoveBroker.
/**
* See the {@link KafkaRebalanceAssemblyOperatorTest#testNewToProposalReadyToRebalancingToReadyThenRefreshRebalance} for description
*/
@Test
public void testNewToProposalReadyToRebalancingToReadyThenRefreshRemoveBroker(VertxTestContext context) throws IOException, URISyntaxException {
KafkaRebalance kr = createKafkaRebalance(CLUSTER_NAMESPACE, CLUSTER_NAME, RESOURCE_NAME, REMOVE_BROKER_KAFKA_REBALANCE_SPEC);
this.krNewToProposalReadyToRebalancingToReadyThenRefresh(context, 0, 0, 0, CruiseControlEndpoints.REMOVE_BROKER, kr);
}
Aggregations