Search in sources :

Example 41 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class RecoveryTeardownService method handleRecoveryTeardownError.

public void handleRecoveryTeardownError(StackView stack, Exception errorDetails) {
    Long stackId = stack.getId();
    String stackUpdateMessage = "Recovery failed: " + errorDetails.getMessage();
    DetailedStackStatus status = DetailedStackStatus.CLUSTER_RECOVERY_FAILED;
    stackUpdater.updateStackStatus(stackId, status, stackUpdateMessage);
    LOGGER.info("Error during stack recovery flow: ", errorDetails);
    metricService.incrementMetricCounter(MetricType.STACK_RECOVERY_TEARDOWN_FAILED, stack, errorDetails);
    flowMessageService.fireEventAndLog(stackId, status.name(), DATALAKE_RECOVERY_FAILED, stackUpdateMessage);
}
Also used : DetailedStackStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus)

Example 42 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class ProvisionerService method waitCloudbreakClusterDeletion.

public void waitCloudbreakClusterDeletion(Long id, PollingConfig pollingConfig) {
    SdxCluster sdxCluster = sdxService.getById(id);
    AtomicInteger deleteFailedCount = new AtomicInteger(1);
    Polling.waitPeriodly(pollingConfig.getSleepTime(), pollingConfig.getSleepTimeUnit()).stopIfException(pollingConfig.getStopPollingIfExceptionOccurred()).stopAfterDelay(pollingConfig.getDuration(), pollingConfig.getDurationTimeUnit()).run(() -> {
        LOGGER.info("Deletion polling cloudbreak for stack status: '{}' in '{}' env", sdxCluster.getClusterName(), sdxCluster.getEnvName());
        try {
            StackV4Response stackV4Response = ThreadBasedUserCrnProvider.doAsInternalActor(regionAwareInternalCrnGeneratorFactory.iam().getInternalCrnForServiceAsString(), () -> stackV4Endpoint.get(0L, sdxCluster.getClusterName(), Collections.emptySet(), sdxCluster.getAccountId()));
            LOGGER.info("Stack status of SDX {} by response from cloudbreak: {}", sdxCluster.getClusterName(), stackV4Response.getStatus().name());
            LOGGER.debug("Response from cloudbreak: {}", JsonUtil.writeValueAsString(stackV4Response));
            ClusterV4Response cluster = stackV4Response.getCluster();
            if (cluster != null) {
                if (StatusKind.PROGRESS.equals(cluster.getStatus().getStatusKind())) {
                    return AttemptResults.justContinue();
                }
                if (Status.DELETE_FAILED.equals(cluster.getStatus())) {
                    // if it is implemented, please remove this
                    if (deleteFailedCount.getAndIncrement() >= DELETE_FAILED_RETRY_COUNT) {
                        LOGGER.error("Cluster deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getCluster().getStatusReason());
                        return AttemptResults.breakFor("Data Lake deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getCluster().getStatusReason());
                    } else {
                        return AttemptResults.justContinue();
                    }
                }
            }
            if (Status.DELETE_FAILED.equals(stackV4Response.getStatus())) {
                // if it is implemented, please remove this
                if (deleteFailedCount.getAndIncrement() >= DELETE_FAILED_RETRY_COUNT) {
                    LOGGER.error("Stack deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getStatusReason());
                    return AttemptResults.breakFor("Data Lake deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getStatusReason());
                } else {
                    return AttemptResults.justContinue();
                }
            } else {
                return AttemptResults.justContinue();
            }
        } catch (NotFoundException e) {
            return AttemptResults.finishWith(null);
        }
    });
    sdxStatusService.setStatusForDatalakeAndNotify(DatalakeStatusEnum.STACK_DELETED, "Datalake stack deleted", sdxCluster);
}
Also used : ClusterV4Response(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.response.cluster.ClusterV4Response) StackV4Response(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.response.StackV4Response) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SdxCluster(com.sequenceiq.datalake.entity.SdxCluster) NotFoundException(javax.ws.rs.NotFoundException)

Example 43 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class SdxRecommendationService method validateVmTypeOverride.

public void validateVmTypeOverride(DetailedEnvironmentResponse environment, SdxCluster sdxCluster) {
    try {
        LOGGER.debug("Validate vm type override for sdx cluster: {}", sdxCluster.getCrn());
        String cloudPlatform = environment.getCloudPlatform();
        if (shouldValidateVmTypes(sdxCluster, cloudPlatform)) {
            StackV4Request stackV4Request = JsonUtil.readValue(sdxCluster.getStackRequest(), StackV4Request.class);
            StackV4Request defaultTemplate = getDefaultTemplate(sdxCluster.getClusterShape(), sdxCluster.getRuntime(), cloudPlatform);
            String region = environment.getRegions().getNames().stream().findFirst().orElse(null);
            List<VmTypeResponse> availableVmTypes = getAvailableVmTypes(environment.getCredential().getCrn(), cloudPlatform, region, null);
            Map<String, VmTypeResponse> defaultVmTypesByInstanceGroup = getDefaultVmTypesByInstanceGroup(availableVmTypes, defaultTemplate);
            Map<String, List<String>> availableVmTypeNamesByInstanceGroup = filterAvailableVmTypeNamesBasedOnDefault(availableVmTypes, defaultVmTypesByInstanceGroup);
            stackV4Request.getInstanceGroups().forEach(instanceGroup -> {
                if (!defaultVmTypesByInstanceGroup.containsKey(instanceGroup.getName())) {
                    String message = "Instance group is missing from default template: " + instanceGroup.getName();
                    LOGGER.warn(message);
                    throw new BadRequestException(message);
                }
                VmTypeResponse defaultTemplateVmType = defaultVmTypesByInstanceGroup.get(instanceGroup.getName());
                if (isCustomInstanceTypeProvided(instanceGroup, defaultTemplateVmType.getValue()) && !isProvidedInstanceTypeIsAvailable(availableVmTypeNamesByInstanceGroup, instanceGroup)) {
                    String message = String.format("Invalid custom instance type for instance group: %s - %s", instanceGroup.getName(), instanceGroup.getTemplate().getInstanceType());
                    LOGGER.warn(message);
                    throw new BadRequestException(message);
                }
            });
        }
    } catch (NotFoundException | BadRequestException e) {
        throw e;
    } catch (Exception e) {
        LOGGER.warn("Validate VM type override failed!", e);
        throw new RuntimeException("Validate VM type override failed: " + e.getMessage());
    }
}
Also used : VmTypeResponse(com.sequenceiq.sdx.api.model.VmTypeResponse) StackV4Request(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.request.StackV4Request) BadRequestException(com.sequenceiq.cloudbreak.common.exception.BadRequestException) NotFoundException(com.sequenceiq.cloudbreak.common.exception.NotFoundException) List(java.util.List) NotFoundException(com.sequenceiq.cloudbreak.common.exception.NotFoundException) BadRequestException(com.sequenceiq.cloudbreak.common.exception.BadRequestException)

Example 44 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class SdxRecommendationService method getRecommendation.

public SdxRecommendationResponse getRecommendation(String credentialCrn, SdxClusterShape clusterShape, String runtimeVersion, String cloudPlatform, String region, String availabilityZone) {
    try {
        StackV4Request defaultTemplate = getDefaultTemplate(clusterShape, runtimeVersion, cloudPlatform);
        List<VmTypeResponse> availableVmTypes = getAvailableVmTypes(credentialCrn, cloudPlatform, region, availabilityZone);
        Map<String, VmTypeResponse> defaultVmTypesByInstanceGroup = getDefaultVmTypesByInstanceGroup(availableVmTypes, defaultTemplate);
        Map<String, List<VmTypeResponse>> availableVmTypesByInstanceGroup = filterAvailableVmTypesBasedOnDefault(availableVmTypes, defaultVmTypesByInstanceGroup);
        LOGGER.debug("Return default template and available vm types for clusterShape: {}, " + "runtimeVersion: {}, cloudPlatform: {}, region: {}, availabilityZone: {}", clusterShape, runtimeVersion, cloudPlatform, region, availabilityZone);
        return new SdxRecommendationResponse(defaultTemplate, availableVmTypesByInstanceGroup);
    } catch (NotFoundException | BadRequestException e) {
        throw e;
    } catch (Exception e) {
        LOGGER.warn("Getting recommendation failed!", e);
        throw new RuntimeException("Getting recommendation failed: " + e.getMessage());
    }
}
Also used : NotFoundException(com.sequenceiq.cloudbreak.common.exception.NotFoundException) NotFoundException(com.sequenceiq.cloudbreak.common.exception.NotFoundException) BadRequestException(com.sequenceiq.cloudbreak.common.exception.BadRequestException) VmTypeResponse(com.sequenceiq.sdx.api.model.VmTypeResponse) StackV4Request(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.request.StackV4Request) SdxRecommendationResponse(com.sequenceiq.sdx.api.model.SdxRecommendationResponse) BadRequestException(com.sequenceiq.cloudbreak.common.exception.BadRequestException) List(java.util.List)

Example 45 with FAILED

use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.

the class ClouderaManagerDecomissioner method decommissionNodesStopStart.

public Set<String> decommissionNodesStopStart(Stack stack, Map<String, InstanceMetaData> hostsToRemove, ApiClient client, long pollingTimeout) {
    HostsResourceApi hostsResourceApi = clouderaManagerApiFactory.getHostsResourceApi(client);
    try {
        ApiHostList hostRefList = hostsResourceApi.readHosts(null, null, SUMMARY_REQUEST_VIEW);
        LOGGER.trace("Target decommissionNodes: count={}, hosts=[{}]", hostsToRemove.size(), hostsToRemove.keySet());
        LOGGER.debug("hostsAvailableFromCM: count={}, hosts=[{}]", hostRefList.getItems().size(), hostRefList.getItems().stream().map(ApiHost::getHostname));
        List<String> stillAvailableRemovableHosts = hostRefList.getItems().stream().filter(apiHostRef -> hostsToRemove.containsKey(apiHostRef.getHostname())).parallel().map(ApiHost::getHostname).collect(Collectors.toList());
        Set<String> hostsAvailableForDecommissionSet = new HashSet<>(stillAvailableRemovableHosts);
        List<String> cmHostsUnavailableForDecommission = hostsToRemove.keySet().stream().filter(h -> !hostsAvailableForDecommissionSet.contains(h)).collect(Collectors.toList());
        if (cmHostsUnavailableForDecommission.size() != 0) {
            LOGGER.info("Some decommission targets are unavailable in CM: TotalDecommissionTargetCount={}, unavailableInCMCount={}, unavailableInCm=[{}]", hostsToRemove.size(), cmHostsUnavailableForDecommission.size(), cmHostsUnavailableForDecommission);
        }
        ClouderaManagerResourceApi apiInstance = clouderaManagerApiFactory.getClouderaManagerResourceApi(client);
        ApiHostNameList body = new ApiHostNameList().items(stillAvailableRemovableHosts);
        ApiCommand apiCommand = apiInstance.hostsDecommissionCommand(body);
        ExtendedPollingResult pollingResult = clouderaManagerPollingServiceProvider.startPollingCmHostsDecommission(stack, client, apiCommand.getId(), pollingTimeout);
        if (pollingResult.isExited()) {
            throw new CancellationException("Cluster was terminated while waiting for host decommission");
        } else if (pollingResult.isTimeout()) {
            String warningMessage = "Cloudera Manager decommission host command {} polling timed out, " + "thus we are aborting the decommission and we are retrying it for lost nodes once again.";
            abortDecommissionWithWarningMessage(apiCommand, client, warningMessage);
            throw new CloudbreakServiceException(String.format("Timeout while Cloudera Manager decommissioned host. CM command Id: %s", apiCommand.getId()));
        }
        return stillAvailableRemovableHosts.stream().map(hostsToRemove::get).filter(instanceMetaData -> instanceMetaData.getDiscoveryFQDN() != null).map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toSet());
    } catch (ApiException e) {
        LOGGER.error("Failed to decommission hosts: {}", hostsToRemove.keySet(), e);
        throw new CloudbreakServiceException(e.getMessage(), e);
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) ApiService(com.cloudera.api.swagger.model.ApiService) ApiRole(com.cloudera.api.swagger.model.ApiRole) ApiException(com.cloudera.api.swagger.client.ApiException) ClustersResourceApi(com.cloudera.api.swagger.ClustersResourceApi) ApiRoleState(com.cloudera.api.swagger.model.ApiRoleState) FlowMessageService(com.sequenceiq.cloudbreak.message.FlowMessageService) Map(java.util.Map) ApiHostTemplate(com.cloudera.api.swagger.model.ApiHostTemplate) ClouderaManagerResourceApi(com.cloudera.api.swagger.ClouderaManagerResourceApi) HostTemplatesResourceApi(com.cloudera.api.swagger.HostTemplatesResourceApi) ApiHostTemplateList(com.cloudera.api.swagger.model.ApiHostTemplateList) NotEnoughNodeException(com.sequenceiq.cloudbreak.cluster.service.NotEnoughNodeException) ApiConfig(com.cloudera.api.swagger.model.ApiConfig) ApiHostsToRemoveArgs(com.cloudera.api.swagger.model.ApiHostsToRemoveArgs) ResourceAttributeUtil(com.sequenceiq.cloudbreak.cluster.util.ResourceAttributeUtil) Collection(java.util.Collection) HostsResourceApi(com.cloudera.api.swagger.HostsResourceApi) Set(java.util.Set) Status(com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status) ExtendedPollingResult(com.sequenceiq.cloudbreak.polling.ExtendedPollingResult) ApiHostNameList(com.cloudera.api.swagger.model.ApiHostNameList) VolumeSetAttributes(com.sequenceiq.cloudbreak.cloud.model.VolumeSetAttributes) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) List(java.util.List) ApiServiceConfig(com.cloudera.api.swagger.model.ApiServiceConfig) Stream(java.util.stream.Stream) ApiHealthSummary(com.cloudera.api.swagger.model.ApiHealthSummary) Optional(java.util.Optional) CancellationException(com.sequenceiq.cloudbreak.cloud.scheduler.CancellationException) Joiner(com.google.common.base.Joiner) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) ApiCommand(com.cloudera.api.swagger.model.ApiCommand) HostServiceStatuses(com.sequenceiq.cloudbreak.cluster.status.HostServiceStatuses) HostServiceStatus(com.sequenceiq.cloudbreak.cluster.status.HostServiceStatus) ApiClient(com.cloudera.api.swagger.client.ApiClient) ApiHostList(com.cloudera.api.swagger.model.ApiHostList) Function(java.util.function.Function) ServicesResourceApi(com.cloudera.api.swagger.ServicesResourceApi) ClouderaManagerPollingServiceProvider(com.sequenceiq.cloudbreak.cm.polling.ClouderaManagerPollingServiceProvider) HashSet(java.util.HashSet) Inject(javax.inject.Inject) ApiHost(com.cloudera.api.swagger.model.ApiHost) RolesResourceApi(com.cloudera.api.swagger.RolesResourceApi) NodeIsBusyException(com.sequenceiq.cloudbreak.cluster.service.NodeIsBusyException) Logger(org.slf4j.Logger) ResourceEvent(com.sequenceiq.cloudbreak.event.ResourceEvent) Consumer(java.util.function.Consumer) HostName.hostName(com.sequenceiq.cloudbreak.cloud.model.HostName.hostName) Component(org.springframework.stereotype.Component) HostGroup(com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) CommandsResourceApi(com.cloudera.api.swagger.CommandsResourceApi) HostName(com.sequenceiq.cloudbreak.cloud.model.HostName) CloudbreakServiceException(com.sequenceiq.cloudbreak.common.exception.CloudbreakServiceException) MgmtServiceResourceApi(com.cloudera.api.swagger.MgmtServiceResourceApi) Comparator(java.util.Comparator) ApiRoleRef(com.cloudera.api.swagger.model.ApiRoleRef) ApiServiceList(com.cloudera.api.swagger.model.ApiServiceList) ClouderaManagerApiFactory(com.sequenceiq.cloudbreak.cm.client.retry.ClouderaManagerApiFactory) StringUtils(org.springframework.util.StringUtils) ApiCommand(com.cloudera.api.swagger.model.ApiCommand) CloudbreakServiceException(com.sequenceiq.cloudbreak.common.exception.CloudbreakServiceException) ApiHostNameList(com.cloudera.api.swagger.model.ApiHostNameList) ApiHostList(com.cloudera.api.swagger.model.ApiHostList) CancellationException(com.sequenceiq.cloudbreak.cloud.scheduler.CancellationException) HostsResourceApi(com.cloudera.api.swagger.HostsResourceApi) ClouderaManagerResourceApi(com.cloudera.api.swagger.ClouderaManagerResourceApi) ExtendedPollingResult(com.sequenceiq.cloudbreak.polling.ExtendedPollingResult) HashSet(java.util.HashSet) ApiException(com.cloudera.api.swagger.client.ApiException)

Aggregations

Stack (com.sequenceiq.cloudbreak.domain.stack.Stack)20 List (java.util.List)19 Set (java.util.Set)18 Inject (javax.inject.Inject)18 Optional (java.util.Optional)17 Logger (org.slf4j.Logger)17 LoggerFactory (org.slf4j.LoggerFactory)17 Map (java.util.Map)16 Collectors (java.util.stream.Collectors)15 InstanceMetaData (com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData)14 HashSet (java.util.HashSet)13 Status (com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status)11 Collection (java.util.Collection)11 Collections (java.util.Collections)11 HostGroup (com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup)9 ResourceEvent (com.sequenceiq.cloudbreak.event.ResourceEvent)9 DetailedStackStatus (com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus)8 VolumeSetAttributes (com.sequenceiq.cloudbreak.cloud.model.VolumeSetAttributes)8 ResourceAttributeUtil (com.sequenceiq.cloudbreak.cluster.util.ResourceAttributeUtil)8 Selectable (com.sequenceiq.cloudbreak.common.event.Selectable)8