use of com.sequenceiq.cloudbreak.domain.stack.Stack in project cloudbreak by hortonworks.
the class ExistingStackPatcherJob method applyStackPatch.
private boolean applyStackPatch(ExistingStackPatchService existingStackPatchService, StackPatch stackPatch) throws JobExecutionException {
Stack stack = stackPatch.getStack();
StackPatchType stackPatchType = existingStackPatchService.getStackPatchType();
if (!StackPatchStatus.FIXED.equals(stackPatch.getStatus())) {
try {
if (existingStackPatchService.isAffected(stack)) {
LOGGER.debug("Stack {} needs patch for {}", stack.getResourceCrn(), stackPatchType);
stackPatchService.updateStatusAndReportUsage(stackPatch, StackPatchStatus.AFFECTED);
boolean success = existingStackPatchService.apply(stack);
if (success) {
stackPatchService.updateStatusAndReportUsage(stackPatch, StackPatchStatus.FIXED);
} else {
stackPatchService.updateStatus(stackPatch, StackPatchStatus.SKIPPED);
}
return success;
} else {
LOGGER.debug("Stack {} is not affected by {}", stack.getResourceCrn(), stackPatchType);
stackPatchService.updateStatus(stackPatch, StackPatchStatus.NOT_AFFECTED);
return true;
}
} catch (ExistingStackPatchApplyException e) {
String message = String.format("Failed to patch stack %s for %s", stack.getResourceCrn(), stackPatchType);
LOGGER.error(message, e);
stackPatchService.updateStatusAndReportUsage(stackPatch, StackPatchStatus.FAILED, e.getMessage());
throw new JobExecutionException(message, e);
}
} else {
LOGGER.debug("Stack {} was already patched for {}", stack.getResourceCrn(), stackPatchType);
return true;
}
}
use of com.sequenceiq.cloudbreak.domain.stack.Stack in project cloudbreak by hortonworks.
the class ExistingStackPatcherJob method executeTracedJob.
@Override
protected void executeTracedJob(JobExecutionContext context) throws JobExecutionException {
Stack stack = stackService.getByIdWithListsInTransaction(getStackId());
Status stackStatus = stack.getStatus();
String stackPatchTypeName = context.getJobDetail().getJobDataMap().getString(STACK_PATCH_TYPE_NAME);
try {
ExistingStackPatchService existingStackPatchService = existingStackPatcherServiceProvider.provide(stackPatchTypeName);
StackPatchType stackPatchType = existingStackPatchService.getStackPatchType();
StackPatch stackPatch = stackPatchService.getOrCreate(stack, stackPatchType);
if (!Status.getUnschedulableStatuses().contains(stackStatus)) {
boolean success = applyStackPatch(existingStackPatchService, stackPatch);
if (success) {
unscheduleJob(context, stackPatch);
}
} else {
LOGGER.debug("Existing stack patching will be unscheduled, because stack {} status is {}", stack.getResourceCrn(), stackStatus);
stackPatchService.updateStatus(stackPatch, StackPatchStatus.UNSCHEDULED);
unscheduleJob(context, stackPatch);
}
} catch (UnknownStackPatchTypeException e) {
String message = "Unknown stack patch type: " + stackPatchTypeName;
unscheduleAndFailJob(message, context, new StackPatch(stack, StackPatchType.UNKNOWN));
} catch (Exception e) {
LOGGER.error("Failed", e);
throw e;
}
}
use of com.sequenceiq.cloudbreak.domain.stack.Stack in project cloudbreak by hortonworks.
the class ValidateCloudConfigHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<ValidateCloudConfigRequest> event) {
ValidateCloudConfigRequest data = event.getData();
Stack stack = stackService.getByIdWithLists(data.getResourceId());
String name = stack.getName();
DetailedEnvironmentResponse environment = environmentClientService.getByCrn(stack.getEnvironmentCrn());
Credential credential = credentialConverter.convert(environment.getCredential());
CloudCredential cloudCredential = credentialToCloudCredentialConverter.convert(credential);
ValidationResult.ValidationResultBuilder validationBuilder = ValidationResult.builder();
stackValidator.validate(stack, validationBuilder);
Set<InstanceGroup> instanceGroups = stack.getInstanceGroups();
measure(() -> {
for (InstanceGroup instanceGroup : instanceGroups) {
LOGGER.info("Validate template for {} name with {} instanceGroup.", name, instanceGroup.toString());
StackType type = stack.getType();
templateValidator.validate(credential, instanceGroup, stack, fromStackType(type == null ? null : type.name()), Optional.of(stack.getCreator()), validationBuilder);
}
}, LOGGER, "Stack's instance templates have been validated in {} ms for stack {}", name);
multiAzValidator.validateMultiAzForStack(stack.getPlatformVariant(), instanceGroups, validationBuilder);
ParametersValidationRequest parametersValidationRequest = parametersValidator.validate(stack.getCloudPlatform(), cloudCredential, stack.getParameters(), stack.getWorkspace().getId());
parametersValidator.waitResult(parametersValidationRequest, validationBuilder);
if (!StackType.LEGACY.equals(stack.getType())) {
dataLakeValidator.validate(stack, validationBuilder);
}
environmentValidator.validate(stack, environment, stack.getType().equals(StackType.WORKLOAD), validationBuilder);
ValidationResult validationResult = validationBuilder.build();
if (validationResult.getState() == ValidationResult.State.ERROR || validationResult.hasError()) {
LOGGER.debug("Stack request has validation error(s): {}.", validationResult.getFormattedErrors());
throw new IllegalStateException(validationResult.getFormattedErrors());
} else {
LOGGER.debug("Stack validation has been finished without any error.");
return new StackEvent(CloudConfigValidationEvent.VALIDATE_CLOUD_CONFIG_FINISHED_EVENT.selector(), data.getResourceId());
}
}
use of com.sequenceiq.cloudbreak.domain.stack.Stack in project cloudbreak by hortonworks.
the class NodeStatusService method saltPing.
public RPCResponse<NodeStatusProto.SaltHealthReport> saltPing(Long stackId) {
Stack stack = stackService.getByIdWithListsInTransaction(stackId);
MDCBuilder.buildMdcContext(stack);
LOGGER.debug("Retrieving salt ping report from the hosts of stack: {}", stack.getResourceCrn());
try (CdpNodeStatusMonitorClient client = factory.getClient(stack, stack.getPrimaryGatewayInstance())) {
return client.saltPing(false, false);
} catch (CdpNodeStatusMonitorClientException e) {
throw new CloudbreakServiceException("Could not get salt ping report from stack.");
}
}
use of com.sequenceiq.cloudbreak.domain.stack.Stack in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<StopStartDownscaleDecommissionViaCMRequest> event) {
StopStartDownscaleDecommissionViaCMRequest request = event.getData();
LOGGER.info("StopStartDownscaleDecommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event.getData());
try {
Stack stack = stackService.getByIdWithLists(request.getResourceId());
Cluster cluster = stack.getCluster();
ClusterDecomissionService clusterDecomissionService = clusterApiConnectors.getConnector(stack).clusterDecomissionService();
Set<String> hostNames = getHostNamesForPrivateIds(request.getInstanceIdsToDecommission(), stack);
LOGGER.debug("Attempting to decommission hosts. count={}, hostnames={}", hostNames.size(), hostNames);
HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
Map<String, InstanceMetaData> hostsToRemove = clusterDecomissionService.collectHostsToRemove(hostGroup, hostNames);
List<String> missingHostsInCm = Collections.emptyList();
if (hostNames.size() != hostsToRemove.size()) {
missingHostsInCm = hostNames.stream().filter(h -> !hostsToRemove.containsKey(h)).collect(Collectors.toList());
LOGGER.info("Found fewer instances in CM to decommission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRemove.size(), hostNames.size(), missingHostsInCm);
}
// TODO CB-14929: Potentially put the nodes into maintenance mode before decommissioning?
// TODO CB-15132: Eventually, try parsing the results of the CM decommission, and see if a partial decommission went through in the
// timebound specified.
Set<String> decommissionedHostNames = Collections.emptySet();
if (hostsToRemove.size() > 0) {
decommissionedHostNames = clusterDecomissionService.decommissionClusterNodesStopStart(hostsToRemove, POLL_FOR_10_MINUTES);
updateInstanceStatuses(hostsToRemove, decommissionedHostNames, InstanceStatus.DECOMMISSIONED, "decommission requested for instances");
}
// This doesn't handle failures. It handles scenarios where CM list APIs don't have the necessary hosts available.
List<String> allMissingHostnames = null;
if (missingHostsInCm.size() > 0) {
allMissingHostnames = new LinkedList<>(missingHostsInCm);
}
if (hostsToRemove.size() != decommissionedHostNames.size()) {
Set<String> finalDecommissionedHostnames = decommissionedHostNames;
List<String> additionalMissingDecommissionHostnames = hostsToRemove.keySet().stream().filter(h -> !finalDecommissionedHostnames.contains(h)).collect(Collectors.toList());
LOGGER.info("Decommissioned fewer instances than requested. decommissionedCount={}, expectedCount={}, initialCount={}, notDecommissioned=[{}]", decommissionedHostNames.size(), hostsToRemove.size(), hostNames.size(), additionalMissingDecommissionHostnames);
if (allMissingHostnames == null) {
allMissingHostnames = new LinkedList<>();
}
allMissingHostnames.addAll(additionalMissingDecommissionHostnames);
}
LOGGER.info("hostsDecommissioned: count={}, hostNames={}", decommissionedHostNames.size(), decommissionedHostNames);
if (decommissionedHostNames.size() > 0) {
LOGGER.debug("Attempting to put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTERINGCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
clusterDecomissionService.enterMaintenanceMode(decommissionedHostNames);
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTEREDCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
LOGGER.debug("Successfully put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
} else {
LOGGER.debug("No nodes decommissioned, hence no nodes being put into maintenance mode");
}
return new StopStartDownscaleDecommissionViaCMResult(request, decommissionedHostNames, allMissingHostnames);
} catch (Exception e) {
// TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
// ClusterClientInitException is one which is explicitly thrown.
String message = "Failed while attempting to decommission nodes via CM";
LOGGER.error(message, e);
return new StopStartDownscaleDecommissionViaCMResult(message, e, request);
}
}
Aggregations