use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<StopStartDownscaleDecommissionViaCMRequest> event) {
StopStartDownscaleDecommissionViaCMRequest request = event.getData();
LOGGER.info("StopStartDownscaleDecommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event.getData());
try {
Stack stack = stackService.getByIdWithLists(request.getResourceId());
Cluster cluster = stack.getCluster();
ClusterDecomissionService clusterDecomissionService = clusterApiConnectors.getConnector(stack).clusterDecomissionService();
Set<String> hostNames = getHostNamesForPrivateIds(request.getInstanceIdsToDecommission(), stack);
LOGGER.debug("Attempting to decommission hosts. count={}, hostnames={}", hostNames.size(), hostNames);
HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
Map<String, InstanceMetaData> hostsToRemove = clusterDecomissionService.collectHostsToRemove(hostGroup, hostNames);
List<String> missingHostsInCm = Collections.emptyList();
if (hostNames.size() != hostsToRemove.size()) {
missingHostsInCm = hostNames.stream().filter(h -> !hostsToRemove.containsKey(h)).collect(Collectors.toList());
LOGGER.info("Found fewer instances in CM to decommission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRemove.size(), hostNames.size(), missingHostsInCm);
}
// TODO CB-14929: Potentially put the nodes into maintenance mode before decommissioning?
// TODO CB-15132: Eventually, try parsing the results of the CM decommission, and see if a partial decommission went through in the
// timebound specified.
Set<String> decommissionedHostNames = Collections.emptySet();
if (hostsToRemove.size() > 0) {
decommissionedHostNames = clusterDecomissionService.decommissionClusterNodesStopStart(hostsToRemove, POLL_FOR_10_MINUTES);
updateInstanceStatuses(hostsToRemove, decommissionedHostNames, InstanceStatus.DECOMMISSIONED, "decommission requested for instances");
}
// This doesn't handle failures. It handles scenarios where CM list APIs don't have the necessary hosts available.
List<String> allMissingHostnames = null;
if (missingHostsInCm.size() > 0) {
allMissingHostnames = new LinkedList<>(missingHostsInCm);
}
if (hostsToRemove.size() != decommissionedHostNames.size()) {
Set<String> finalDecommissionedHostnames = decommissionedHostNames;
List<String> additionalMissingDecommissionHostnames = hostsToRemove.keySet().stream().filter(h -> !finalDecommissionedHostnames.contains(h)).collect(Collectors.toList());
LOGGER.info("Decommissioned fewer instances than requested. decommissionedCount={}, expectedCount={}, initialCount={}, notDecommissioned=[{}]", decommissionedHostNames.size(), hostsToRemove.size(), hostNames.size(), additionalMissingDecommissionHostnames);
if (allMissingHostnames == null) {
allMissingHostnames = new LinkedList<>();
}
allMissingHostnames.addAll(additionalMissingDecommissionHostnames);
}
LOGGER.info("hostsDecommissioned: count={}, hostNames={}", decommissionedHostNames.size(), decommissionedHostNames);
if (decommissionedHostNames.size() > 0) {
LOGGER.debug("Attempting to put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTERINGCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
clusterDecomissionService.enterMaintenanceMode(decommissionedHostNames);
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTEREDCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
LOGGER.debug("Successfully put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
} else {
LOGGER.debug("No nodes decommissioned, hence no nodes being put into maintenance mode");
}
return new StopStartDownscaleDecommissionViaCMResult(request, decommissionedHostNames, allMissingHostnames);
} catch (Exception e) {
// TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
// ClusterClientInitException is one which is explicitly thrown.
String message = "Failed while attempting to decommission nodes via CM";
LOGGER.error(message, e);
return new StopStartDownscaleDecommissionViaCMResult(message, e, request);
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class ClusterOperationService method removeGatewayIfNotSupported.
private void removeGatewayIfNotSupported(Cluster cluster, List<ClusterComponent> components) {
Optional<ClusterComponent> cmRepoOpt = components.stream().filter(cmp -> ComponentType.CM_REPO_DETAILS.equals(cmp.getComponentType())).findFirst();
if (cmRepoOpt.isPresent()) {
try {
ClouderaManagerRepo cmRepo = cmRepoOpt.get().getAttributes().get(ClouderaManagerRepo.class);
if (!CMRepositoryVersionUtil.isKnoxGatewaySupported(cmRepo)) {
LOGGER.debug("Knox gateway is not supported by CM version: {}, ignoring it for cluster: {}", cmRepo.getVersion(), cluster.getName());
cluster.setGateway(null);
}
} catch (IOException e) {
LOGGER.debug("Failed to read CM repo cluster component", e);
}
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class CloudbreakEventHandler method sendNotification.
private void sendNotification(CloudbreakCompositeEvent cloudbreakCompositeEvent) {
String owner = cloudbreakCompositeEvent.getOwner();
StackV4Response stackResponse = cloudbreakCompositeEvent.getStackResponse();
if (stackResponse != null) {
try {
Collection<String> resourceEventMessageArgs = cloudbreakCompositeEvent.getResourceEventMessageArgs();
notificationService.send(cloudbreakCompositeEvent.getResourceEvent(), resourceEventMessageArgs, stackResponse, owner);
} catch (Exception e) {
String msg = String.format("Failed to send notification from structured event, stack('%s')", stackResponse.getId());
LOGGER.warn(msg, e);
}
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class MountDisks method mountDisks.
private void mountDisks(Stack stack, Set<Node> nodesWithDiskData, Set<Node> allNodes) throws CloudbreakException {
Cluster cluster = stack.getCluster();
try {
List<GatewayConfig> gatewayConfigs = gatewayConfigService.getAllGatewayConfigs(stack);
ExitCriteriaModel exitCriteriaModel = clusterDeletionBasedModel(stack.getId(), cluster.getId());
Map<String, Map<String, String>> mountInfo;
if (isCbVersionPostOptimisation(stack)) {
mountInfo = hostOrchestrator.formatAndMountDisksOnNodes(stack, gatewayConfigs, nodesWithDiskData, allNodes, exitCriteriaModel, stack.getPlatformVariant());
} else {
mountInfo = hostOrchestrator.formatAndMountDisksOnNodesLegacy(gatewayConfigs, nodesWithDiskData, allNodes, exitCriteriaModel, stack.getPlatformVariant());
}
mountInfo.forEach((hostname, value) -> {
Optional<String> instanceIdOptional = stack.getInstanceMetaDataAsList().stream().filter(instanceMetaData -> hostname.equals(instanceMetaData.getDiscoveryFQDN())).filter(instanceMetaData -> InstanceStatus.CREATED.equals(instanceMetaData.getInstanceStatus())).map(InstanceMetaData::getInstanceId).findFirst();
if (instanceIdOptional.isPresent()) {
String uuids = value.getOrDefault("uuids", "");
String fstab = value.getOrDefault("fstab", "");
if (!StringUtils.isEmpty(uuids) && !StringUtils.isEmpty(fstab)) {
persistUuidAndFstab(stack, instanceIdOptional.get(), hostname, uuids, fstab);
}
}
});
} catch (CloudbreakOrchestratorFailedException e) {
LOGGER.error("Failed to mount disks", e);
throw new CloudbreakSecuritySetupException(e);
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.FAILED in project cloudbreak by hortonworks.
the class ClouderaManagerDecomissioner method deleteHostFromClouderaManager.
private void deleteHostFromClouderaManager(Stack stack, InstanceMetaData data, ApiClient client) {
HostsResourceApi hostsResourceApi = clouderaManagerApiFactory.getHostsResourceApi(client);
try {
ApiHostList hostRefList = hostsResourceApi.readHosts(null, null, SUMMARY_REQUEST_VIEW);
Optional<ApiHost> hostRefOptional = hostRefList.getItems().stream().filter(host -> data.getDiscoveryFQDN() != null && data.getDiscoveryFQDN().equals(host.getHostname())).findFirst();
if (hostRefOptional.isPresent()) {
ApiHost hostRef = hostRefOptional.get();
ClustersResourceApi clustersResourceApi = clouderaManagerApiFactory.getClustersResourceApi(client);
clustersResourceApi.removeHost(stack.getName(), hostRef.getHostId());
hostsResourceApi.deleteHost(hostRef.getHostId());
LOGGER.debug("Host remove request sent. Host id: [{}]", hostRef.getHostId());
} else {
LOGGER.debug("Host already deleted.");
}
} catch (ApiException e) {
LOGGER.error("Failed to delete host: {}", data.getDiscoveryFQDN(), e);
throw new CloudbreakServiceException(e.getMessage(), e);
}
}
Aggregations