Examples with CruiseControlUserTaskStatus - io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlUserTaskStatus

Example 1 with CruiseControlUserTaskStatus

use of io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlUserTaskStatus in project strimzi by strimzi.

the class KafkaRebalanceAssemblyOperator method onRebalancing.

/**
 * This method handles the transition from {@code Rebalancing} state.
 * It starts a periodic timer in order to check the status of the ongoing rebalance processing on Cruise Control side.
 * In order to do that, it calls the related Cruise Control REST API about asking the user task status.
 * When the rebalance is finished, the next state is {@code Ready}.
 * If the user sets the strimzi.io/rebalance annotation to 'stop', it calls the Cruise Control REST API for stopping the ongoing task
 * and then transitions to the {@code Stopped} state.
 * If the user sets any other values for the strimzi.io/rebalance annotation, it is just ignored and the user task checks continue.
 * This method holds the lock until the rebalance is finished, the ongoing task is stopped or any exception is raised.
 *
 * @param reconciliation Reconciliation information
 * @param host Cruise Control service to which sending the REST API requests
 * @param apiClient Cruise Control REST API client instance
 * @param kafkaRebalance Current {@code KafkaRebalance} resource
 * @param rebalanceAnnotation The current value for the strimzi.io/rebalance annotation
 * @return a Future with the next {@code MapAndStatus<ConfigMap, KafkaRebalanceStatus>} including the state
 */
private Future<MapAndStatus<ConfigMap, KafkaRebalanceStatus>> onRebalancing(Reconciliation reconciliation, String host, CruiseControlApi apiClient, KafkaRebalance kafkaRebalance, KafkaRebalanceAnnotation rebalanceAnnotation) {
    Promise<MapAndStatus<ConfigMap, KafkaRebalanceStatus>> p = Promise.promise();
    if (rebalanceAnnotation == KafkaRebalanceAnnotation.none) {
        LOGGER.infoCr(reconciliation, "Starting Cruise Control rebalance user task status timer");
        String sessionId = kafkaRebalance.getStatus().getSessionId();
        AtomicInteger ccApiErrorCount = new AtomicInteger();
        vertx.setPeriodic(REBALANCE_POLLING_TIMER_MS, t -> {
            // Check that we have not already failed to contact the API beyond the allowed number of times.
            if (ccApiErrorCount.get() >= MAX_API_RETRIES) {
                vertx.cancelTimer(t);
                p.fail(new CruiseControlRestException("Unable to reach Cruise Control API after " + MAX_API_RETRIES + " attempts"));
            }
            kafkaRebalanceOperator.getAsync(kafkaRebalance.getMetadata().getNamespace(), kafkaRebalance.getMetadata().getName()).onSuccess(currentKafkaRebalance -> {
                // Checking that the resource was not deleted between periodic polls
                if (currentKafkaRebalance != null) {
                    // Safety check as timer might be called again (from a delayed timer firing)
                    if (state(currentKafkaRebalance) == KafkaRebalanceState.Rebalancing) {
                        if (rebalanceAnnotation(reconciliation, currentKafkaRebalance) == KafkaRebalanceAnnotation.stop) {
                            LOGGER.debugCr(reconciliation, "Stopping current Cruise Control rebalance user task");
                            vertx.cancelTimer(t);
                            apiClient.stopExecution(host, CruiseControl.REST_API_PORT).onSuccess(r -> p.complete(buildRebalanceStatus(null, KafkaRebalanceState.Stopped, validate(reconciliation, kafkaRebalance)))).onFailure(e -> {
                                LOGGER.errorCr(reconciliation, "Cruise Control stopping execution failed", e.getCause());
                                p.fail(e.getCause());
                            });
                        } else {
                            LOGGER.infoCr(reconciliation, "Getting Cruise Control rebalance user task status");
                            apiClient.getUserTaskStatus(host, CruiseControl.REST_API_PORT, sessionId).onSuccess(cruiseControlResponse -> {
                                JsonObject taskStatusJson = cruiseControlResponse.getJson();
                                CruiseControlUserTaskStatus taskStatus = CruiseControlUserTaskStatus.lookup(taskStatusJson.getString("Status"));
                                switch(taskStatus) {
                                    case COMPLETED:
                                        vertx.cancelTimer(t);
                                        LOGGER.infoCr(reconciliation, "Rebalance ({}) is now complete", sessionId);
                                        p.complete(buildRebalanceStatus(kafkaRebalance, null, KafkaRebalanceState.Ready, taskStatusJson, validate(reconciliation, kafkaRebalance)));
                                        break;
                                    case COMPLETED_WITH_ERROR:
                                        // TODO: There doesn't seem to be a way to retrieve the actual error message from the user tasks endpoint?
                                        // We may need to propose an upstream PR for this.
                                        // TODO: Once we can get the error details we need to add an error field to the Rebalance Status to hold
                                        // details of any issues while rebalancing.
                                        LOGGER.errorCr(reconciliation, "Rebalance ({}) optimization proposal has failed to complete", sessionId);
                                        vertx.cancelTimer(t);
                                        p.complete(buildRebalanceStatus(sessionId, KafkaRebalanceState.NotReady, validate(reconciliation, kafkaRebalance)));
                                        break;
                                    case // Rebalance is still in progress
                                    IN_EXECUTION:
                                        // the proposal is complete but the optimisation proposal summary will be missing.
                                        if (currentKafkaRebalance.getStatus().getOptimizationResult() == null || currentKafkaRebalance.getStatus().getOptimizationResult().isEmpty()) {
                                            LOGGER.infoCr(reconciliation, "Rebalance ({}) optimization proposal is now ready and has been added to the status", sessionId);
                                            // Cancel the timer so that the status is returned and updated.
                                            vertx.cancelTimer(t);
                                            p.complete(buildRebalanceStatus(kafkaRebalance, sessionId, KafkaRebalanceState.Rebalancing, taskStatusJson, validate(reconciliation, kafkaRebalance)));
                                        }
                                        ccApiErrorCount.set(0);
                                        // We can then update the status at this point.
                                        break;
                                    case // Rebalance proposal is still being calculated
                                    ACTIVE:
                                        // If a rebalance(dryrun=false) was called and the proposal is still being prepared then the task
                                        // will be in an ACTIVE state. When the proposal is ready it will shift to IN_EXECUTION and we will
                                        // check that the optimisation proposal is added to the status on the next reconcile.
                                        LOGGER.infoCr(reconciliation, "Rebalance ({}) optimization proposal is still being prepared", sessionId);
                                        ccApiErrorCount.set(0);
                                        break;
                                    default:
                                        LOGGER.errorCr(reconciliation, "Unexpected state {}", taskStatus);
                                        vertx.cancelTimer(t);
                                        p.fail("Unexpected state " + taskStatus);
                                        break;
                                }
                            }).onFailure(e -> {
                                LOGGER.errorCr(reconciliation, "Cruise Control getting rebalance task status failed", e.getCause());
                                // To make sure this error is not just a temporary problem with the network we retry several times.
                                // If the number of errors pass the MAX_API_ERRORS limit then the period method will fail the promise.
                                ccApiErrorCount.getAndIncrement();
                            });
                        }
                    } else {
                        p.complete(new MapAndStatus<>(null, currentKafkaRebalance.getStatus()));
                    }
                } else {
                    LOGGER.debugCr(reconciliation, "Rebalance resource was deleted, stopping the request time");
                    vertx.cancelTimer(t);
                    p.complete();
                }
            }).onFailure(e -> {
                LOGGER.errorCr(reconciliation, "Cruise Control getting rebalance resource failed", e.getCause());
                vertx.cancelTimer(t);
                p.fail(e.getCause());
            });
        });
    } else {
        p.complete(new MapAndStatus<>(null, kafkaRebalance.getStatus()));
    }
    return p.future();
}

Also used : Arrays(java.util.Arrays) LabelSelector(io.fabric8.kubernetes.api.model.LabelSelector) ANNO_STRIMZI_IO_REBALANCE(io.strimzi.operator.common.Annotations.ANNO_STRIMZI_IO_REBALANCE) Watcher(io.fabric8.kubernetes.client.Watcher) Annotations(io.strimzi.operator.common.Annotations) CruiseControlConfiguration(io.strimzi.operator.cluster.model.CruiseControlConfiguration) Resource(io.fabric8.kubernetes.client.dsl.Resource) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ResourceOperatorSupplier(io.strimzi.operator.cluster.operator.resource.ResourceOperatorSupplier) JsonObject(io.vertx.core.json.JsonObject) CruiseControlRebalanceKeys(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlRebalanceKeys) ModelUtils(io.strimzi.operator.cluster.model.ModelUtils) KubernetesClientException(io.fabric8.kubernetes.client.KubernetesClientException) AbstractOperator(io.strimzi.operator.common.AbstractOperator) StatusUtils(io.strimzi.operator.common.operator.resource.StatusUtils) KafkaRebalance(io.strimzi.api.kafka.model.KafkaRebalance) SecretOperator(io.strimzi.operator.common.operator.resource.SecretOperator) Set(java.util.Set) KafkaRebalanceList(io.strimzi.api.kafka.KafkaRebalanceList) KafkaRebalanceAnnotation(io.strimzi.api.kafka.model.balancing.KafkaRebalanceAnnotation) Future(io.vertx.core.Future) Collectors(java.util.stream.Collectors) NoSuchResourceException(io.strimzi.operator.cluster.model.NoSuchResourceException) CruiseControlUserTaskStatus(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlUserTaskStatus) KafkaRebalanceStatusBuilder(io.strimzi.api.kafka.model.status.KafkaRebalanceStatusBuilder) CruiseControlResources(io.strimzi.api.kafka.model.CruiseControlResources) List(java.util.List) Labels(io.strimzi.operator.common.model.Labels) Stream(java.util.stream.Stream) CruiseControlApi(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlApi) Secret(io.fabric8.kubernetes.api.model.Secret) Optional(java.util.Optional) Condition(io.strimzi.api.kafka.model.status.Condition) PlatformFeaturesAvailability(io.strimzi.operator.PlatformFeaturesAvailability) ClusterOperatorConfig(io.strimzi.operator.cluster.ClusterOperatorConfig) KafkaList(io.strimzi.api.kafka.KafkaList) KafkaRebalanceState(io.strimzi.api.kafka.model.balancing.KafkaRebalanceState) KafkaRebalanceBuilder(io.strimzi.api.kafka.model.KafkaRebalanceBuilder) HashMap(java.util.HashMap) CruiseControlApiImpl(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlApiImpl) KafkaRebalanceSpec(io.strimzi.api.kafka.model.KafkaRebalanceSpec) ArrayList(java.util.ArrayList) WatcherException(io.fabric8.kubernetes.client.WatcherException) CompositeFuture(io.vertx.core.CompositeFuture) ConfigMapOperator(io.strimzi.operator.common.operator.resource.ConfigMapOperator) CrdOperator(io.strimzi.operator.common.operator.resource.CrdOperator) RebalanceOptions(io.strimzi.operator.cluster.operator.resource.cruisecontrol.RebalanceOptions) CruiseControl(io.strimzi.operator.cluster.model.CruiseControl) JbodStorage(io.strimzi.api.kafka.model.storage.JbodStorage) ReconciliationLogger(io.strimzi.operator.common.ReconciliationLogger) InvalidResourceException(io.strimzi.operator.cluster.model.InvalidResourceException) Promise(io.vertx.core.Promise) Vertx(io.vertx.core.Vertx) CruiseControlLoadParameters(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlLoadParameters) ConfigMap(io.fabric8.kubernetes.api.model.ConfigMap) KafkaRebalanceStatus(io.strimzi.api.kafka.model.status.KafkaRebalanceStatus) ConfigMapBuilder(io.fabric8.kubernetes.api.model.ConfigMapBuilder) Reconciliation(io.strimzi.operator.common.Reconciliation) JsonArray(io.vertx.core.json.JsonArray) HTTP_DEFAULT_IDLE_TIMEOUT_SECONDS(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlApiImpl.HTTP_DEFAULT_IDLE_TIMEOUT_SECONDS) StatusDiff(io.strimzi.operator.cluster.model.StatusDiff) Util(io.strimzi.operator.common.Util) AbstractWatchableStatusedResourceOperator(io.strimzi.operator.common.operator.resource.AbstractWatchableStatusedResourceOperator) KubernetesClient(io.fabric8.kubernetes.client.KubernetesClient) Kafka(io.strimzi.api.kafka.model.Kafka) CruiseControlRestException(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlRestException) Collections(java.util.Collections) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CruiseControlUserTaskStatus(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlUserTaskStatus) JsonObject(io.vertx.core.json.JsonObject) CruiseControlRestException(io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlRestException)

Example 2 with CruiseControlUserTaskStatus

use of io.strimzi.operator.cluster.operator.resource.cruisecontrol.CruiseControlUserTaskStatus in project strimzi-kafka-operator by strimzi.

the class KafkaRebalanceAssemblyOperator method onRebalancing.

/**
 * This method handles the transition from {@code Rebalancing} state.
 * It starts a periodic timer in order to check the status of the ongoing rebalance processing on Cruise Control side.
 * In order to do that, it calls the related Cruise Control REST API about asking the user task status.
 * When the rebalance is finished, the next state is {@code Ready}.
 * If the user sets the strimzi.io/rebalance annotation to 'stop', it calls the Cruise Control REST API for stopping the ongoing task
 * and then transitions to the {@code Stopped} state.
 * If the user sets any other values for the strimzi.io/rebalance annotation, it is just ignored and the user task checks continue.
 * This method holds the lock until the rebalance is finished, the ongoing task is stopped or any exception is raised.
 *
 * @param reconciliation Reconciliation information
 * @param host Cruise Control service to which sending the REST API requests
 * @param apiClient Cruise Control REST API client instance
 * @param kafkaRebalance Current {@code KafkaRebalance} resource
 * @param rebalanceAnnotation The current value for the strimzi.io/rebalance annotation
 * @return a Future with the next {@code MapAndStatus<ConfigMap, KafkaRebalanceStatus>} including the state
 */
private Future<MapAndStatus<ConfigMap, KafkaRebalanceStatus>> onRebalancing(Reconciliation reconciliation, String host, CruiseControlApi apiClient, KafkaRebalance kafkaRebalance, KafkaRebalanceAnnotation rebalanceAnnotation) {
    Promise<MapAndStatus<ConfigMap, KafkaRebalanceStatus>> p = Promise.promise();
    if (rebalanceAnnotation == KafkaRebalanceAnnotation.none) {
        LOGGER.infoCr(reconciliation, "Starting Cruise Control rebalance user task status timer");
        String sessionId = kafkaRebalance.getStatus().getSessionId();
        AtomicInteger ccApiErrorCount = new AtomicInteger();
        vertx.setPeriodic(REBALANCE_POLLING_TIMER_MS, t -> {
            // Check that we have not already failed to contact the API beyond the allowed number of times.
            if (ccApiErrorCount.get() >= MAX_API_RETRIES) {
                vertx.cancelTimer(t);
                p.fail(new CruiseControlRestException("Unable to reach Cruise Control API after " + MAX_API_RETRIES + " attempts"));
            }
            kafkaRebalanceOperator.getAsync(kafkaRebalance.getMetadata().getNamespace(), kafkaRebalance.getMetadata().getName()).onSuccess(currentKafkaRebalance -> {
                // Checking that the resource was not deleted between periodic polls
                if (currentKafkaRebalance != null) {
                    // Safety check as timer might be called again (from a delayed timer firing)
                    if (state(currentKafkaRebalance) == KafkaRebalanceState.Rebalancing) {
                        if (rebalanceAnnotation(reconciliation, currentKafkaRebalance) == KafkaRebalanceAnnotation.stop) {
                            LOGGER.debugCr(reconciliation, "Stopping current Cruise Control rebalance user task");
                            vertx.cancelTimer(t);
                            apiClient.stopExecution(host, CruiseControl.REST_API_PORT).onSuccess(r -> p.complete(buildRebalanceStatus(null, KafkaRebalanceState.Stopped, validate(reconciliation, kafkaRebalance)))).onFailure(e -> {
                                LOGGER.errorCr(reconciliation, "Cruise Control stopping execution failed", e.getCause());
                                p.fail(e.getCause());
                            });
                        } else {
                            LOGGER.infoCr(reconciliation, "Getting Cruise Control rebalance user task status");
                            apiClient.getUserTaskStatus(host, CruiseControl.REST_API_PORT, sessionId).onSuccess(cruiseControlResponse -> {
                                JsonObject taskStatusJson = cruiseControlResponse.getJson();
                                CruiseControlUserTaskStatus taskStatus = CruiseControlUserTaskStatus.lookup(taskStatusJson.getString("Status"));
                                switch(taskStatus) {
                                    case COMPLETED:
                                        vertx.cancelTimer(t);
                                        LOGGER.infoCr(reconciliation, "Rebalance ({}) is now complete", sessionId);
                                        p.complete(buildRebalanceStatus(kafkaRebalance, null, KafkaRebalanceState.Ready, taskStatusJson, validate(reconciliation, kafkaRebalance)));
                                        break;
                                    case COMPLETED_WITH_ERROR:
                                        // TODO: There doesn't seem to be a way to retrieve the actual error message from the user tasks endpoint?
                                        // We may need to propose an upstream PR for this.
                                        // TODO: Once we can get the error details we need to add an error field to the Rebalance Status to hold
                                        // details of any issues while rebalancing.
                                        LOGGER.errorCr(reconciliation, "Rebalance ({}) optimization proposal has failed to complete", sessionId);
                                        vertx.cancelTimer(t);
                                        p.complete(buildRebalanceStatus(sessionId, KafkaRebalanceState.NotReady, validate(reconciliation, kafkaRebalance)));
                                        break;
                                    case // Rebalance is still in progress
                                    IN_EXECUTION:
                                        // the proposal is complete but the optimisation proposal summary will be missing.
                                        if (currentKafkaRebalance.getStatus().getOptimizationResult() == null || currentKafkaRebalance.getStatus().getOptimizationResult().isEmpty()) {
                                            LOGGER.infoCr(reconciliation, "Rebalance ({}) optimization proposal is now ready and has been added to the status", sessionId);
                                            // Cancel the timer so that the status is returned and updated.
                                            vertx.cancelTimer(t);
                                            p.complete(buildRebalanceStatus(kafkaRebalance, sessionId, KafkaRebalanceState.Rebalancing, taskStatusJson, validate(reconciliation, kafkaRebalance)));
                                        }
                                        ccApiErrorCount.set(0);
                                        // We can then update the status at this point.
                                        break;
                                    case // Rebalance proposal is still being calculated
                                    ACTIVE:
                                        // If a rebalance(dryrun=false) was called and the proposal is still being prepared then the task
                                        // will be in an ACTIVE state. When the proposal is ready it will shift to IN_EXECUTION and we will
                                        // check that the optimisation proposal is added to the status on the next reconcile.
                                        LOGGER.infoCr(reconciliation, "Rebalance ({}) optimization proposal is still being prepared", sessionId);
                                        ccApiErrorCount.set(0);
                                        break;
                                    default:
                                        LOGGER.errorCr(reconciliation, "Unexpected state {}", taskStatus);
                                        vertx.cancelTimer(t);
                                        p.fail("Unexpected state " + taskStatus);
                                        break;
                                }
                            }).onFailure(e -> {
                                LOGGER.errorCr(reconciliation, "Cruise Control getting rebalance task status failed", e.getCause());
                                // To make sure this error is not just a temporary problem with the network we retry several times.
                                // If the number of errors pass the MAX_API_ERRORS limit then the period method will fail the promise.
                                ccApiErrorCount.getAndIncrement();
                            });
                        }
                    } else {
                        p.complete(new MapAndStatus<>(null, currentKafkaRebalance.getStatus()));
                    }
                } else {
                    LOGGER.debugCr(reconciliation, "Rebalance resource was deleted, stopping the request time");
                    vertx.cancelTimer(t);
                    p.complete();
                }
            }).onFailure(e -> {
                LOGGER.errorCr(reconciliation, "Cruise Control getting rebalance resource failed", e.getCause());
                vertx.cancelTimer(t);
                p.fail(e.getCause());
            });
        });
    } else {
        p.complete(new MapAndStatus<>(null, kafkaRebalance.getStatus()));
    }
    return p.future();
}

Aggregations

ConfigMap (io.fabric8.kubernetes.api.model.ConfigMap)2 ConfigMapBuilder (io.fabric8.kubernetes.api.model.ConfigMapBuilder)2 LabelSelector (io.fabric8.kubernetes.api.model.LabelSelector)2 Secret (io.fabric8.kubernetes.api.model.Secret)2 KubernetesClient (io.fabric8.kubernetes.client.KubernetesClient)2 KubernetesClientException (io.fabric8.kubernetes.client.KubernetesClientException)2 Watcher (io.fabric8.kubernetes.client.Watcher)2 WatcherException (io.fabric8.kubernetes.client.WatcherException)2 Resource (io.fabric8.kubernetes.client.dsl.Resource)2 KafkaList (io.strimzi.api.kafka.KafkaList)2 KafkaRebalanceList (io.strimzi.api.kafka.KafkaRebalanceList)2 CruiseControlResources (io.strimzi.api.kafka.model.CruiseControlResources)2 Kafka (io.strimzi.api.kafka.model.Kafka)2 KafkaRebalance (io.strimzi.api.kafka.model.KafkaRebalance)2 KafkaRebalanceBuilder (io.strimzi.api.kafka.model.KafkaRebalanceBuilder)2 KafkaRebalanceSpec (io.strimzi.api.kafka.model.KafkaRebalanceSpec)2 KafkaRebalanceAnnotation (io.strimzi.api.kafka.model.balancing.KafkaRebalanceAnnotation)2 KafkaRebalanceState (io.strimzi.api.kafka.model.balancing.KafkaRebalanceState)2 Condition (io.strimzi.api.kafka.model.status.Condition)2 KafkaRebalanceStatus (io.strimzi.api.kafka.model.status.KafkaRebalanceStatus)2