use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class RecoveryTeardownService method handleRecoveryTeardownError.
public void handleRecoveryTeardownError(StackView stack, Exception errorDetails) {
Long stackId = stack.getId();
String stackUpdateMessage = "Recovery failed: " + errorDetails.getMessage();
DetailedStackStatus status = DetailedStackStatus.CLUSTER_RECOVERY_FAILED;
stackUpdater.updateStackStatus(stackId, status, stackUpdateMessage);
LOGGER.info("Error during stack recovery flow: ", errorDetails);
metricService.incrementMetricCounter(MetricType.STACK_RECOVERY_TEARDOWN_FAILED, stack, errorDetails);
flowMessageService.fireEventAndLog(stackId, status.name(), DATALAKE_RECOVERY_FAILED, stackUpdateMessage);
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class ProvisionerService method waitCloudbreakClusterDeletion.
public void waitCloudbreakClusterDeletion(Long id, PollingConfig pollingConfig) {
SdxCluster sdxCluster = sdxService.getById(id);
AtomicInteger deleteFailedCount = new AtomicInteger(1);
Polling.waitPeriodly(pollingConfig.getSleepTime(), pollingConfig.getSleepTimeUnit()).stopIfException(pollingConfig.getStopPollingIfExceptionOccurred()).stopAfterDelay(pollingConfig.getDuration(), pollingConfig.getDurationTimeUnit()).run(() -> {
LOGGER.info("Deletion polling cloudbreak for stack status: '{}' in '{}' env", sdxCluster.getClusterName(), sdxCluster.getEnvName());
try {
StackV4Response stackV4Response = ThreadBasedUserCrnProvider.doAsInternalActor(regionAwareInternalCrnGeneratorFactory.iam().getInternalCrnForServiceAsString(), () -> stackV4Endpoint.get(0L, sdxCluster.getClusterName(), Collections.emptySet(), sdxCluster.getAccountId()));
LOGGER.info("Stack status of SDX {} by response from cloudbreak: {}", sdxCluster.getClusterName(), stackV4Response.getStatus().name());
LOGGER.debug("Response from cloudbreak: {}", JsonUtil.writeValueAsString(stackV4Response));
ClusterV4Response cluster = stackV4Response.getCluster();
if (cluster != null) {
if (StatusKind.PROGRESS.equals(cluster.getStatus().getStatusKind())) {
return AttemptResults.justContinue();
}
if (Status.DELETE_FAILED.equals(cluster.getStatus())) {
// if it is implemented, please remove this
if (deleteFailedCount.getAndIncrement() >= DELETE_FAILED_RETRY_COUNT) {
LOGGER.error("Cluster deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getCluster().getStatusReason());
return AttemptResults.breakFor("Data Lake deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getCluster().getStatusReason());
} else {
return AttemptResults.justContinue();
}
}
}
if (Status.DELETE_FAILED.equals(stackV4Response.getStatus())) {
// if it is implemented, please remove this
if (deleteFailedCount.getAndIncrement() >= DELETE_FAILED_RETRY_COUNT) {
LOGGER.error("Stack deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getStatusReason());
return AttemptResults.breakFor("Data Lake deletion failed '" + sdxCluster.getClusterName() + "', " + stackV4Response.getStatusReason());
} else {
return AttemptResults.justContinue();
}
} else {
return AttemptResults.justContinue();
}
} catch (NotFoundException e) {
return AttemptResults.finishWith(null);
}
});
sdxStatusService.setStatusForDatalakeAndNotify(DatalakeStatusEnum.STACK_DELETED, "Datalake stack deleted", sdxCluster);
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class SdxRecommendationService method validateVmTypeOverride.
public void validateVmTypeOverride(DetailedEnvironmentResponse environment, SdxCluster sdxCluster) {
try {
LOGGER.debug("Validate vm type override for sdx cluster: {}", sdxCluster.getCrn());
String cloudPlatform = environment.getCloudPlatform();
if (shouldValidateVmTypes(sdxCluster, cloudPlatform)) {
StackV4Request stackV4Request = JsonUtil.readValue(sdxCluster.getStackRequest(), StackV4Request.class);
StackV4Request defaultTemplate = getDefaultTemplate(sdxCluster.getClusterShape(), sdxCluster.getRuntime(), cloudPlatform);
String region = environment.getRegions().getNames().stream().findFirst().orElse(null);
List<VmTypeResponse> availableVmTypes = getAvailableVmTypes(environment.getCredential().getCrn(), cloudPlatform, region, null);
Map<String, VmTypeResponse> defaultVmTypesByInstanceGroup = getDefaultVmTypesByInstanceGroup(availableVmTypes, defaultTemplate);
Map<String, List<String>> availableVmTypeNamesByInstanceGroup = filterAvailableVmTypeNamesBasedOnDefault(availableVmTypes, defaultVmTypesByInstanceGroup);
stackV4Request.getInstanceGroups().forEach(instanceGroup -> {
if (!defaultVmTypesByInstanceGroup.containsKey(instanceGroup.getName())) {
String message = "Instance group is missing from default template: " + instanceGroup.getName();
LOGGER.warn(message);
throw new BadRequestException(message);
}
VmTypeResponse defaultTemplateVmType = defaultVmTypesByInstanceGroup.get(instanceGroup.getName());
if (isCustomInstanceTypeProvided(instanceGroup, defaultTemplateVmType.getValue()) && !isProvidedInstanceTypeIsAvailable(availableVmTypeNamesByInstanceGroup, instanceGroup)) {
String message = String.format("Invalid custom instance type for instance group: %s - %s", instanceGroup.getName(), instanceGroup.getTemplate().getInstanceType());
LOGGER.warn(message);
throw new BadRequestException(message);
}
});
}
} catch (NotFoundException | BadRequestException e) {
throw e;
} catch (Exception e) {
LOGGER.warn("Validate VM type override failed!", e);
throw new RuntimeException("Validate VM type override failed: " + e.getMessage());
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class SdxRecommendationService method getRecommendation.
public SdxRecommendationResponse getRecommendation(String credentialCrn, SdxClusterShape clusterShape, String runtimeVersion, String cloudPlatform, String region, String availabilityZone) {
try {
StackV4Request defaultTemplate = getDefaultTemplate(clusterShape, runtimeVersion, cloudPlatform);
List<VmTypeResponse> availableVmTypes = getAvailableVmTypes(credentialCrn, cloudPlatform, region, availabilityZone);
Map<String, VmTypeResponse> defaultVmTypesByInstanceGroup = getDefaultVmTypesByInstanceGroup(availableVmTypes, defaultTemplate);
Map<String, List<VmTypeResponse>> availableVmTypesByInstanceGroup = filterAvailableVmTypesBasedOnDefault(availableVmTypes, defaultVmTypesByInstanceGroup);
LOGGER.debug("Return default template and available vm types for clusterShape: {}, " + "runtimeVersion: {}, cloudPlatform: {}, region: {}, availabilityZone: {}", clusterShape, runtimeVersion, cloudPlatform, region, availabilityZone);
return new SdxRecommendationResponse(defaultTemplate, availableVmTypesByInstanceGroup);
} catch (NotFoundException | BadRequestException e) {
throw e;
} catch (Exception e) {
LOGGER.warn("Getting recommendation failed!", e);
throw new RuntimeException("Getting recommendation failed: " + e.getMessage());
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class ClouderaManagerDecomissioner method decommissionNodesStopStart.
public Set<String> decommissionNodesStopStart(Stack stack, Map<String, InstanceMetaData> hostsToRemove, ApiClient client, long pollingTimeout) {
HostsResourceApi hostsResourceApi = clouderaManagerApiFactory.getHostsResourceApi(client);
try {
ApiHostList hostRefList = hostsResourceApi.readHosts(null, null, SUMMARY_REQUEST_VIEW);
LOGGER.trace("Target decommissionNodes: count={}, hosts=[{}]", hostsToRemove.size(), hostsToRemove.keySet());
LOGGER.debug("hostsAvailableFromCM: count={}, hosts=[{}]", hostRefList.getItems().size(), hostRefList.getItems().stream().map(ApiHost::getHostname));
List<String> stillAvailableRemovableHosts = hostRefList.getItems().stream().filter(apiHostRef -> hostsToRemove.containsKey(apiHostRef.getHostname())).parallel().map(ApiHost::getHostname).collect(Collectors.toList());
Set<String> hostsAvailableForDecommissionSet = new HashSet<>(stillAvailableRemovableHosts);
List<String> cmHostsUnavailableForDecommission = hostsToRemove.keySet().stream().filter(h -> !hostsAvailableForDecommissionSet.contains(h)).collect(Collectors.toList());
if (cmHostsUnavailableForDecommission.size() != 0) {
LOGGER.info("Some decommission targets are unavailable in CM: TotalDecommissionTargetCount={}, unavailableInCMCount={}, unavailableInCm=[{}]", hostsToRemove.size(), cmHostsUnavailableForDecommission.size(), cmHostsUnavailableForDecommission);
}
ClouderaManagerResourceApi apiInstance = clouderaManagerApiFactory.getClouderaManagerResourceApi(client);
ApiHostNameList body = new ApiHostNameList().items(stillAvailableRemovableHosts);
ApiCommand apiCommand = apiInstance.hostsDecommissionCommand(body);
ExtendedPollingResult pollingResult = clouderaManagerPollingServiceProvider.startPollingCmHostsDecommission(stack, client, apiCommand.getId(), pollingTimeout);
if (pollingResult.isExited()) {
throw new CancellationException("Cluster was terminated while waiting for host decommission");
} else if (pollingResult.isTimeout()) {
String warningMessage = "Cloudera Manager decommission host command {} polling timed out, " + "thus we are aborting the decommission and we are retrying it for lost nodes once again.";
abortDecommissionWithWarningMessage(apiCommand, client, warningMessage);
throw new CloudbreakServiceException(String.format("Timeout while Cloudera Manager decommissioned host. CM command Id: %s", apiCommand.getId()));
}
return stillAvailableRemovableHosts.stream().map(hostsToRemove::get).filter(instanceMetaData -> instanceMetaData.getDiscoveryFQDN() != null).map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toSet());
} catch (ApiException e) {
LOGGER.error("Failed to decommission hosts: {}", hostsToRemove.keySet(), e);
throw new CloudbreakServiceException(e.getMessage(), e);
}
}
Aggregations