Search in sources :

Example 1 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class GatewayTopologyToGatewayTopologyV4RequestConverter method convert.

public GatewayTopologyV4Request convert(GatewayTopology gatewayTopology) {
    GatewayTopologyV4Request gatewayTopologyJson = new GatewayTopologyV4Request();
    gatewayTopologyJson.setTopologyName(gatewayTopology.getTopologyName());
    Json exposedJson = gatewayTopology.getExposedServices();
    if (exposedJson != null && StringUtils.isNotEmpty(exposedJson.getValue())) {
        try {
            gatewayTopologyJson.setExposedServices(exposedJson.get(ExposedServices.class).getServices());
        } catch (IOException e) {
            LOGGER.info("Failed to add exposedServices to response", e);
            throw new CloudbreakApiException("Failed to add exposedServices to response", e);
        }
    }
    return gatewayTopologyJson;
}
Also used : GatewayTopologyV4Request(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.request.cluster.gateway.topology.GatewayTopologyV4Request) Json(com.sequenceiq.cloudbreak.common.json.Json) IOException(java.io.IOException) CloudbreakApiException(com.sequenceiq.cloudbreak.exception.CloudbreakApiException)

Example 2 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class StopStartUpscaleActions method cmCommissionAction.

@Bean(name = "STOPSTART_UPSCALE_HOSTS_COMMISSION_STATE")
public Action<?, ?> cmCommissionAction() {
    return new AbstractStopStartUpscaleActions<>(StopStartUpscaleStartInstancesResult.class) {

        @Override
        protected void doExecute(StopStartUpscaleContext context, StopStartUpscaleStartInstancesResult payload, Map<Object, Object> variables) throws Exception {
            // Update instance metadata for successful nodes before handling / logging info about failures.
            List<CloudVmInstanceStatus> cloudVmInstanceStatusList = payload.getAffectedInstanceStatuses();
            Set<String> cloudInstanceIdsStarted = cloudVmInstanceStatusList.stream().filter(x -> x.getStatus() == InstanceStatus.STARTED).map(x -> x.getCloudInstance().getInstanceId()).collect(Collectors.toUnmodifiableSet());
            List<InstanceMetaData> startedInstancesMetaData = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), cloudInstanceIdsStarted);
            clusterUpscaleFlowService.instancesStarted(context.getStack().getId(), startedInstancesMetaData);
            handleInstanceUnsuccessfulStart(context, cloudVmInstanceStatusList);
            // This list is currently empty. It could be populated later in another flow-step by querying CM to get service health.
            // Meant to be a mechanism which detects cloud instances which are RUNNING, but not being utilized (likely due to previous failures)
            List<CloudInstance> instancesWithServicesNotRunning = payload.getStartInstanceRequest().getStartedInstancesWithServicesNotRunning();
            List<InstanceMetaData> metaDataWithServicesNotRunning = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), instancesWithServicesNotRunning.stream().map(i -> i.getInstanceId()).collect(Collectors.toUnmodifiableSet()));
            LOGGER.info("StartedInstancesCount={}, StartedInstancesMetadataCount={}," + " instancesWithServicesNotRunningCount={}, instancesWithServicesNotRunningMetadataCount={}", cloudInstanceIdsStarted.size(), startedInstancesMetaData.size(), instancesWithServicesNotRunning.size(), metaDataWithServicesNotRunning.size());
            int toCommissionNodeCount = metaDataWithServicesNotRunning.size() + startedInstancesMetaData.size();
            if (toCommissionNodeCount < context.getAdjustment()) {
                LOGGER.warn("Not enough nodes found to commission. DesiredCount={}, availableCount={}", context.getAdjustment(), toCommissionNodeCount);
                clusterUpscaleFlowService.warnNotEnoughInstances(context.getStack().getId(), context.getHostGroupName(), context.getAdjustment(), toCommissionNodeCount);
            }
            clusterUpscaleFlowService.upscaleCommissioningNodes(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
            StopStartUpscaleCommissionViaCMRequest commissionRequest = new StopStartUpscaleCommissionViaCMRequest(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
            sendEvent(context, commissionRequest);
        }

        private void handleInstanceUnsuccessfulStart(StopStartUpscaleContext context, List<CloudVmInstanceStatus> cloudVmInstanceStatusList) {
            try {
                List<CloudVmInstanceStatus> instancesNotInDesiredState = cloudVmInstanceStatusList.stream().filter(i -> i.getStatus() != InstanceStatus.STARTED).collect(Collectors.toList());
                if (instancesNotInDesiredState.size() > 0) {
                    // Not updating the status of these instances in the DB. Instead letting the regular syncer threads take care of this.
                    // This is in case there is additional logic in the syncers while processing Instance state changes.
                    LOGGER.warn("Some instances could not be started: count={}, instances={}", instancesNotInDesiredState.size(), instancesNotInDesiredState);
                    clusterUpscaleFlowService.logInstancesFailedToStart(context.getStack().getId(), instancesNotInDesiredState);
                // TODO CB-15132: Eventually, we may want to take some corrective action.
                }
            } catch (Exception e) {
                LOGGER.warn("Failed while attempting to log info about instances which did not start. Ignoring, and letting flow proceed", e);
            }
        }
    };
}
Also used : Action(org.springframework.statemachine.action.Action) DetailedStackStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus) CloudContext(com.sequenceiq.cloudbreak.cloud.context.CloudContext) AvailabilityZone.availabilityZone(com.sequenceiq.cloudbreak.cloud.model.AvailabilityZone.availabilityZone) StopStartUpscaleStartInstancesRequest(com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesRequest) LoggerFactory(org.slf4j.LoggerFactory) StackToCloudStackConverter(com.sequenceiq.cloudbreak.converter.spi.StackToCloudStackConverter) Location.location(com.sequenceiq.cloudbreak.cloud.model.Location.location) CloudInstance(com.sequenceiq.cloudbreak.cloud.model.CloudInstance) STOPSTART_UPSCALE_FAILURE_EVENT(com.sequenceiq.cloudbreak.core.flow2.cluster.stopstartus.StopStartUpscaleEvent.STOPSTART_UPSCALE_FAILURE_EVENT) InstanceMetaDataToCloudInstanceConverter(com.sequenceiq.cloudbreak.converter.spi.InstanceMetaDataToCloudInstanceConverter) Map(java.util.Map) Location(com.sequenceiq.cloudbreak.cloud.model.Location) CloudVmInstanceStatus(com.sequenceiq.cloudbreak.cloud.model.CloudVmInstanceStatus) AbstractStackAction(com.sequenceiq.cloudbreak.core.flow2.AbstractStackAction) AbstractStackFailureAction(com.sequenceiq.cloudbreak.core.flow2.stack.AbstractStackFailureAction) STOPPED(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.STOPPED) StackFailureContext(com.sequenceiq.cloudbreak.core.flow2.stack.StackFailureContext) Set(java.util.Set) FlowParameters(com.sequenceiq.flow.core.FlowParameters) StopStartUpscaleStartInstancesResult(com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult) Crn(com.sequenceiq.cloudbreak.auth.crn.Crn) StopStartUpscaleCommissionViaCMRequest(com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest) Collectors(java.util.stream.Collectors) Configuration(org.springframework.context.annotation.Configuration) CloudStack(com.sequenceiq.cloudbreak.cloud.model.CloudStack) List(java.util.List) StackUtil(com.sequenceiq.cloudbreak.util.StackUtil) Optional(java.util.Optional) StackService(com.sequenceiq.cloudbreak.service.stack.StackService) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) Payload(com.sequenceiq.cloudbreak.common.event.Payload) MDCBuilder(com.sequenceiq.cloudbreak.logger.MDCBuilder) Selectable(com.sequenceiq.cloudbreak.common.event.Selectable) StackFailureEvent(com.sequenceiq.cloudbreak.reactor.api.event.StackFailureEvent) StackEvent(com.sequenceiq.cloudbreak.reactor.api.event.StackEvent) StopStartUpscaleCommissionViaCMResult(com.sequenceiq.cloudbreak.reactor.api.event.orchestration.StopStartUpscaleCommissionViaCMResult) HashSet(java.util.HashSet) Inject(javax.inject.Inject) ResourceService(com.sequenceiq.cloudbreak.service.resource.ResourceService) CloudInstanceIdToInstanceMetaDataConverter(com.sequenceiq.cloudbreak.converter.CloudInstanceIdToInstanceMetaDataConverter) ClusterManagerType(com.sequenceiq.cloudbreak.common.type.ClusterManagerType) StateContext(org.springframework.statemachine.StateContext) Logger(org.slf4j.Logger) Region.region(com.sequenceiq.cloudbreak.cloud.model.Region.region) CloudCredential(com.sequenceiq.cloudbreak.cloud.model.CloudCredential) STOPSTART_UPSCALE_FINALIZED_EVENT(com.sequenceiq.cloudbreak.core.flow2.cluster.stopstartus.StopStartUpscaleEvent.STOPSTART_UPSCALE_FINALIZED_EVENT) InstanceStatus(com.sequenceiq.cloudbreak.cloud.model.InstanceStatus) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) Bean(org.springframework.context.annotation.Bean) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StopStartUpscaleTriggerEvent(com.sequenceiq.cloudbreak.core.flow2.event.StopStartUpscaleTriggerEvent) Collections(java.util.Collections) CloudVmInstanceStatus(com.sequenceiq.cloudbreak.cloud.model.CloudVmInstanceStatus) CloudInstance(com.sequenceiq.cloudbreak.cloud.model.CloudInstance) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) StopStartUpscaleStartInstancesResult(com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult) StopStartUpscaleCommissionViaCMRequest(com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest) List(java.util.List) Map(java.util.Map) Bean(org.springframework.context.annotation.Bean)

Example 3 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class StartExternalDatabaseHandler method doAccept.

@Override
protected Selectable doAccept(HandlerEvent<StartExternalDatabaseRequest> event) {
    LOGGER.debug("In StartExternalDatabaseHandler.doAccept");
    StartExternalDatabaseRequest request = event.getData();
    Stack stack = stackService.getById(request.getResourceId());
    DatabaseAvailabilityType externalDatabase = ObjectUtils.defaultIfNull(stack.getExternalDatabaseCreationType(), DatabaseAvailabilityType.NONE);
    LOGGER.debug("External database: {} for stack {}", externalDatabase.name(), stack.getName());
    LOGGER.debug("Getting environment CRN for stack {}", stack.getName());
    DetailedEnvironmentResponse environment = environmentClientService.getByCrn(stack.getEnvironmentCrn());
    Selectable result;
    try {
        if (StackType.WORKLOAD != stack.getType()) {
            LOGGER.debug("External database start in Cloudbreak service is required for WORKLOAD stacks only.");
            result = new StartExternalDatabaseResult(stack.getId(), EXTERNAL_DATABASE_STARTED_EVENT.event(), stack.getName(), null);
        } else if (externalDatabase.isEmbedded()) {
            LOGGER.info("External database for stack {} is not requested. Start is not possible.", stack.getName());
            result = new StartExternalDatabaseResult(stack.getId(), EXTERNAL_DATABASE_STARTED_EVENT.event(), stack.getName(), null);
        } else if (!externalDatabaseConfig.isExternalDatabasePauseSupportedFor(CloudPlatform.valueOf(environment.getCloudPlatform()))) {
            LOGGER.debug("External database pause is not supported for '{}' cloud platform.", environment.getCloudPlatform());
            result = new StartExternalDatabaseResult(stack.getId(), EXTERNAL_DATABASE_STARTED_EVENT.event(), stack.getName(), null);
        } else {
            LOGGER.debug("Updating stack {} status from {} to {}", stack.getName(), stack.getStatus().name(), DetailedStackStatus.EXTERNAL_DATABASE_START_IN_PROGRESS.name());
            stackUpdaterService.updateStatus(stack.getId(), DetailedStackStatus.EXTERNAL_DATABASE_START_IN_PROGRESS, ResourceEvent.CLUSTER_EXTERNAL_DATABASE_START_COMMANCED, "External database start in progress");
            startService.startDatabase(stack.getCluster(), externalDatabase, environment);
            LOGGER.debug("Updating stack {} status from {} to {}", stack.getName(), stack.getStatus().name(), DetailedStackStatus.EXTERNAL_DATABASE_START_FINISHED.name());
            stackUpdaterService.updateStatus(stack.getId(), DetailedStackStatus.EXTERNAL_DATABASE_START_FINISHED, ResourceEvent.CLUSTER_EXTERNAL_DATABASE_START_FINISHED, "External database start finished");
            result = new StartExternalDatabaseResult(stack.getId(), EXTERNAL_DATABASE_STARTED_EVENT.event(), stack.getName(), stack.getCluster().getDatabaseServerCrn());
        }
    } catch (UserBreakException e) {
        LOGGER.error("Database 'start' polling exited before timeout. Cause: ", e);
        result = startFailedEvent(stack, e);
    } catch (PollerStoppedException e) {
        LOGGER.error(String.format("Database 'start' poller stopped for stack: %s", stack.getName()), e);
        result = startFailedEvent(stack, e);
    } catch (PollerException e) {
        LOGGER.error(String.format("Database 'start' polling failed for stack: %s", stack.getName()), e);
        result = startFailedEvent(stack, e);
    }
    return result;
}
Also used : UserBreakException(com.dyngr.exception.UserBreakException) Selectable(com.sequenceiq.cloudbreak.common.event.Selectable) StartExternalDatabaseResult(com.sequenceiq.cloudbreak.reactor.api.event.externaldatabase.StartExternalDatabaseResult) PollerException(com.dyngr.exception.PollerException) DetailedEnvironmentResponse(com.sequenceiq.environment.api.v1.environment.model.response.DetailedEnvironmentResponse) StartExternalDatabaseRequest(com.sequenceiq.cloudbreak.reactor.api.event.externaldatabase.StartExternalDatabaseRequest) PollerStoppedException(com.dyngr.exception.PollerStoppedException) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) DatabaseAvailabilityType(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.request.database.DatabaseAvailabilityType)

Example 4 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class ExistingStackPatcherJob method executeTracedJob.

@Override
protected void executeTracedJob(JobExecutionContext context) throws JobExecutionException {
    Stack stack = stackService.getByIdWithListsInTransaction(getStackId());
    Status stackStatus = stack.getStatus();
    String stackPatchTypeName = context.getJobDetail().getJobDataMap().getString(STACK_PATCH_TYPE_NAME);
    try {
        ExistingStackPatchService existingStackPatchService = existingStackPatcherServiceProvider.provide(stackPatchTypeName);
        StackPatchType stackPatchType = existingStackPatchService.getStackPatchType();
        StackPatch stackPatch = stackPatchService.getOrCreate(stack, stackPatchType);
        if (!Status.getUnschedulableStatuses().contains(stackStatus)) {
            boolean success = applyStackPatch(existingStackPatchService, stackPatch);
            if (success) {
                unscheduleJob(context, stackPatch);
            }
        } else {
            LOGGER.debug("Existing stack patching will be unscheduled, because stack {} status is {}", stack.getResourceCrn(), stackStatus);
            stackPatchService.updateStatus(stackPatch, StackPatchStatus.UNSCHEDULED);
            unscheduleJob(context, stackPatch);
        }
    } catch (UnknownStackPatchTypeException e) {
        String message = "Unknown stack patch type: " + stackPatchTypeName;
        unscheduleAndFailJob(message, context, new StackPatch(stack, StackPatchType.UNKNOWN));
    } catch (Exception e) {
        LOGGER.error("Failed", e);
        throw e;
    }
}
Also used : Status(com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status) StackPatchStatus(com.sequenceiq.cloudbreak.domain.stack.StackPatchStatus) ExistingStackPatchService(com.sequenceiq.cloudbreak.service.stackpatch.ExistingStackPatchService) StackPatch(com.sequenceiq.cloudbreak.domain.stack.StackPatch) StackPatchType(com.sequenceiq.cloudbreak.domain.stack.StackPatchType) ExistingStackPatchApplyException(com.sequenceiq.cloudbreak.service.stackpatch.ExistingStackPatchApplyException) JobExecutionException(org.quartz.JobExecutionException) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack)

Example 5 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class StackTerminationService method handleStackTerminationError.

public void handleStackTerminationError(StackView stackView, Exception errorDetails, boolean forced) {
    Long stackId = stackView.getId();
    String stackUpdateMessage;
    ResourceEvent resourceEvent;
    DetailedStackStatus status;
    if (!forced) {
        stackUpdateMessage = "Termination failed: " + errorDetails.getMessage();
        status = DetailedStackStatus.DELETE_FAILED;
        resourceEvent = STACK_INFRASTRUCTURE_DELETE_FAILED;
        stackUpdater.updateStackStatus(stackId, status, stackUpdateMessage);
        LOGGER.debug("Error during stack termination flow: ", errorDetails);
    } else {
        clusterService.updateClusterStatusByStackId(stackId, DetailedStackStatus.CLUSTER_DELETE_COMPLETED);
        terminationService.finalizeTermination(stackId, true);
        stackUpdateMessage = "Stack was force terminated.";
        status = DetailedStackStatus.DELETE_COMPLETED;
        resourceEvent = STACK_FORCED_DELETE_COMPLETED;
    }
    flowMessageService.fireEventAndLog(stackId, status.name(), resourceEvent, stackUpdateMessage);
    metricService.incrementMetricCounter(MetricType.STACK_TERMINATION_FAILED, stackView, errorDetails);
}
Also used : ResourceEvent(com.sequenceiq.cloudbreak.event.ResourceEvent) DetailedStackStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus)

Aggregations

Stack (com.sequenceiq.cloudbreak.domain.stack.Stack)20 List (java.util.List)19 Set (java.util.Set)18 Inject (javax.inject.Inject)18 Optional (java.util.Optional)17 Logger (org.slf4j.Logger)17 LoggerFactory (org.slf4j.LoggerFactory)17 Map (java.util.Map)16 Collectors (java.util.stream.Collectors)15 InstanceMetaData (com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData)14 HashSet (java.util.HashSet)13 Status (com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status)11 Collection (java.util.Collection)11 Collections (java.util.Collections)11 HostGroup (com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup)9 ResourceEvent (com.sequenceiq.cloudbreak.event.ResourceEvent)9 DetailedStackStatus (com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus)8 VolumeSetAttributes (com.sequenceiq.cloudbreak.cloud.model.VolumeSetAttributes)8 ResourceAttributeUtil (com.sequenceiq.cloudbreak.cluster.util.ResourceAttributeUtil)8 Selectable (com.sequenceiq.cloudbreak.common.event.Selectable)8