use of com.sequenceiq.cloudbreak.cluster.api.ClusterDecomissionService in project cloudbreak by hortonworks.
the class DecommissionHandler method accept.
@Override
public void accept(Event<DecommissionRequest> event) {
DecommissionRequest request = event.getData();
DecommissionResult result;
Set<String> hostNames = Collections.emptySet();
boolean forced = request.getDetails() != null && request.getDetails().isForced();
try {
Stack stack = stackService.getByIdWithListsInTransaction(request.getResourceId());
hostNames = getHostNamesForPrivateIds(request, stack);
ClusterDecomissionService clusterDecomissionService = getClusterDecomissionService(stack);
Map<String, InstanceMetaData> hostsToRemove = new HashMap<>();
Set<String> hostGroupNames = request.getHostGroupNames();
for (String hostGroup : hostGroupNames) {
hostsToRemove.putAll(getRemovableHosts(clusterDecomissionService, stack, hostGroup, hostNames));
}
updateInstancesToDeleteRequested(hostsToRemove.values());
if (!hostsToRemove.isEmpty()) {
executePreTerminationRecipes(stack, hostsToRemove.keySet());
}
Optional<String> runtimeVersion = runtimeVersionService.getRuntimeVersion(stack.getCluster().getId());
if (entitlementService.bulkHostsRemovalFromCMSupported(Crn.fromString(stack.getResourceCrn()).getAccountId()) && CMRepositoryVersionUtil.isCmBulkHostsRemovalAllowed(runtimeVersion)) {
result = bulkHostsRemoval(request, hostNames, forced, stack, clusterDecomissionService, hostsToRemove);
} else {
result = singleHostsRemoval(request, hostNames, forced, stack, clusterDecomissionService, hostsToRemove);
}
} catch (Exception e) {
LOGGER.info("Exception occurred during decommission.", e);
if (isTolerableError(e) && forced && !request.getDetails().isRepair()) {
eventService.fireCloudbreakEvent(request.getResourceId(), UPDATE_IN_PROGRESS.name(), CLUSTER_DECOMMISSION_FAILED_FORCE_DELETE_CONTINUE, Collections.singletonList(e.getMessage()));
result = new DecommissionResult(request, hostNames);
} else {
result = new DecommissionResult(e.getMessage(), e, request, hostNames, UNKNOWN_ERROR_PHASE);
}
}
eventBus.notify(result.selector(), new Event<>(event.getHeaders(), result));
}
use of com.sequenceiq.cloudbreak.cluster.api.ClusterDecomissionService in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<StopStartDownscaleDecommissionViaCMRequest> event) {
StopStartDownscaleDecommissionViaCMRequest request = event.getData();
LOGGER.info("StopStartDownscaleDecommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event.getData());
try {
Stack stack = stackService.getByIdWithLists(request.getResourceId());
Cluster cluster = stack.getCluster();
ClusterDecomissionService clusterDecomissionService = clusterApiConnectors.getConnector(stack).clusterDecomissionService();
Set<String> hostNames = getHostNamesForPrivateIds(request.getInstanceIdsToDecommission(), stack);
LOGGER.debug("Attempting to decommission hosts. count={}, hostnames={}", hostNames.size(), hostNames);
HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
Map<String, InstanceMetaData> hostsToRemove = clusterDecomissionService.collectHostsToRemove(hostGroup, hostNames);
List<String> missingHostsInCm = Collections.emptyList();
if (hostNames.size() != hostsToRemove.size()) {
missingHostsInCm = hostNames.stream().filter(h -> !hostsToRemove.containsKey(h)).collect(Collectors.toList());
LOGGER.info("Found fewer instances in CM to decommission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRemove.size(), hostNames.size(), missingHostsInCm);
}
// TODO CB-14929: Potentially put the nodes into maintenance mode before decommissioning?
// TODO CB-15132: Eventually, try parsing the results of the CM decommission, and see if a partial decommission went through in the
// timebound specified.
Set<String> decommissionedHostNames = Collections.emptySet();
if (hostsToRemove.size() > 0) {
decommissionedHostNames = clusterDecomissionService.decommissionClusterNodesStopStart(hostsToRemove, POLL_FOR_10_MINUTES);
updateInstanceStatuses(hostsToRemove, decommissionedHostNames, InstanceStatus.DECOMMISSIONED, "decommission requested for instances");
}
// This doesn't handle failures. It handles scenarios where CM list APIs don't have the necessary hosts available.
List<String> allMissingHostnames = null;
if (missingHostsInCm.size() > 0) {
allMissingHostnames = new LinkedList<>(missingHostsInCm);
}
if (hostsToRemove.size() != decommissionedHostNames.size()) {
Set<String> finalDecommissionedHostnames = decommissionedHostNames;
List<String> additionalMissingDecommissionHostnames = hostsToRemove.keySet().stream().filter(h -> !finalDecommissionedHostnames.contains(h)).collect(Collectors.toList());
LOGGER.info("Decommissioned fewer instances than requested. decommissionedCount={}, expectedCount={}, initialCount={}, notDecommissioned=[{}]", decommissionedHostNames.size(), hostsToRemove.size(), hostNames.size(), additionalMissingDecommissionHostnames);
if (allMissingHostnames == null) {
allMissingHostnames = new LinkedList<>();
}
allMissingHostnames.addAll(additionalMissingDecommissionHostnames);
}
LOGGER.info("hostsDecommissioned: count={}, hostNames={}", decommissionedHostNames.size(), decommissionedHostNames);
if (decommissionedHostNames.size() > 0) {
LOGGER.debug("Attempting to put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTERINGCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
clusterDecomissionService.enterMaintenanceMode(decommissionedHostNames);
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTEREDCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
LOGGER.debug("Successfully put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
} else {
LOGGER.debug("No nodes decommissioned, hence no nodes being put into maintenance mode");
}
return new StopStartDownscaleDecommissionViaCMResult(request, decommissionedHostNames, allMissingHostnames);
} catch (Exception e) {
// TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
// ClusterClientInitException is one which is explicitly thrown.
String message = "Failed while attempting to decommission nodes via CM";
LOGGER.error(message, e);
return new StopStartDownscaleDecommissionViaCMResult(message, e, request);
}
}
use of com.sequenceiq.cloudbreak.cluster.api.ClusterDecomissionService in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandlerTest method testErrorFromCmHostCollection.
@Test
void testErrorFromCmHostCollection() {
int instancesToDecommissionCount = 5;
int expcetedInstanceToCollectCount = 5;
int expectedInstancesDecommissionedCount = 5;
List<InstanceMetaData> instancesToDecommission = getInstancesToDecommission(instancesToDecommissionCount);
HostGroup hostGroup = createHostGroup(instancesToDecommission);
Map<String, InstanceMetaData> collected = instancesToDecommission.stream().limit(expcetedInstanceToCollectCount).collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
List<InstanceMetaData> decommissionedMetadataList = collected.values().stream().limit(expectedInstancesDecommissionedCount).collect(Collectors.toList());
Set<String> fqdnsDecommissioned = decommissionedMetadataList.stream().map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toUnmodifiableSet());
Set<Long> instanceIdsToDecommission = instancesToDecommission.stream().map(InstanceMetaData::getPrivateId).collect(Collectors.toUnmodifiableSet());
Set<String> hostnamesToDecommission = instancesToDecommission.stream().map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toUnmodifiableSet());
setupAdditionalMocks(hostGroup, instancesToDecommission, collected, fqdnsDecommissioned);
when(clusterDecomissionService.collectHostsToRemove(eq(hostGroup), eq(hostnamesToDecommission))).thenThrow(new RuntimeException("collectHostsToDecommissionError"));
StopStartDownscaleDecommissionViaCMRequest request = new StopStartDownscaleDecommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instanceIdsToDecommission);
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
verify(clusterDecomissionService).collectHostsToRemove(eq(hostGroup), eq(hostnamesToDecommission));
assertThat(selectable).isInstanceOf(StopStartDownscaleDecommissionViaCMResult.class);
StopStartDownscaleDecommissionViaCMResult result = (StopStartDownscaleDecommissionViaCMResult) selectable;
assertThat(result.getNotDecommissionedHostFqdns()).hasSize(0);
assertThat(result.getDecommissionedHostFqdns()).hasSize(0);
assertThat(result.getErrorDetails().getMessage()).isEqualTo("collectHostsToDecommissionError");
assertThat(result.getStatus()).isEqualTo(EventStatus.FAILED);
assertThat(result.selector()).isEqualTo("STOPSTARTDOWNSCALEDECOMMISSIONVIACMRESULT_ERROR");
verifyNoMoreInteractions(instanceMetaDataService);
verifyNoMoreInteractions(flowMessageService);
verifyNoMoreInteractions(clusterDecomissionService);
}
use of com.sequenceiq.cloudbreak.cluster.api.ClusterDecomissionService in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandlerTest method testNoNodesAvailableInCm.
@Test
void testNoNodesAvailableInCm() {
int instancesToDecommissionCount = 5;
int expcetedInstanceToCollectCount = 0;
int expectedInstancesDecommissionedCount = 0;
List<InstanceMetaData> instancesToDecommission = getInstancesToDecommission(instancesToDecommissionCount);
HostGroup hostGroup = createHostGroup(instancesToDecommission);
Map<String, InstanceMetaData> collected = instancesToDecommission.stream().limit(expcetedInstanceToCollectCount).collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
List<InstanceMetaData> decommissionedMetadataList = collected.values().stream().limit(expectedInstancesDecommissionedCount).collect(Collectors.toList());
Set<String> fqdnsDecommissioned = decommissionedMetadataList.stream().map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toUnmodifiableSet());
setupAdditionalMocks(hostGroup, instancesToDecommission, collected, fqdnsDecommissioned);
Set<Long> instanceIdsToDecommission = instancesToDecommission.stream().map(InstanceMetaData::getPrivateId).collect(Collectors.toUnmodifiableSet());
Set<String> hostnamesToDecommission = instancesToDecommission.stream().map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toUnmodifiableSet());
StopStartDownscaleDecommissionViaCMRequest request = new StopStartDownscaleDecommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instanceIdsToDecommission);
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
assertThat(selectable).isInstanceOf(StopStartDownscaleDecommissionViaCMResult.class);
StopStartDownscaleDecommissionViaCMResult result = (StopStartDownscaleDecommissionViaCMResult) selectable;
assertThat(result.getDecommissionedHostFqdns()).hasSize(expectedInstancesDecommissionedCount);
assertThat(result.getNotDecommissionedHostFqdns()).hasSize(instancesToDecommissionCount - expectedInstancesDecommissionedCount);
verifyNoMoreInteractions(instanceMetaDataService);
verify(clusterDecomissionService).collectHostsToRemove(eq(hostGroup), eq(hostnamesToDecommission));
verifyNoMoreInteractions(flowMessageService);
verifyNoMoreInteractions(clusterDecomissionService);
}
use of com.sequenceiq.cloudbreak.cluster.api.ClusterDecomissionService in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandlerTest method testNoNodesFromCMDecommission.
@Test
void testNoNodesFromCMDecommission() {
int instancesToDecommissionCount = 5;
int expcetedInstanceToCollectCount = 4;
int expectedInstancesDecommissionedCount = 0;
List<InstanceMetaData> instancesToDecommission = getInstancesToDecommission(instancesToDecommissionCount);
HostGroup hostGroup = createHostGroup(instancesToDecommission);
Map<String, InstanceMetaData> collected = instancesToDecommission.stream().limit(expcetedInstanceToCollectCount).collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
List<InstanceMetaData> decommissionedMetadataList = collected.values().stream().limit(expectedInstancesDecommissionedCount).collect(Collectors.toList());
Set<String> fqdnsDecommissioned = decommissionedMetadataList.stream().map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toUnmodifiableSet());
setupAdditionalMocks(hostGroup, instancesToDecommission, collected, fqdnsDecommissioned);
Set<Long> instanceIdsToDecommission = instancesToDecommission.stream().map(InstanceMetaData::getPrivateId).collect(Collectors.toUnmodifiableSet());
Set<String> hostnamesToDecommission = instancesToDecommission.stream().map(InstanceMetaData::getDiscoveryFQDN).collect(Collectors.toUnmodifiableSet());
StopStartDownscaleDecommissionViaCMRequest request = new StopStartDownscaleDecommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instanceIdsToDecommission);
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
assertThat(selectable).isInstanceOf(StopStartDownscaleDecommissionViaCMResult.class);
StopStartDownscaleDecommissionViaCMResult result = (StopStartDownscaleDecommissionViaCMResult) selectable;
assertThat(result.getDecommissionedHostFqdns()).hasSize(expectedInstancesDecommissionedCount);
assertThat(result.getNotDecommissionedHostFqdns()).hasSize(instancesToDecommissionCount - expectedInstancesDecommissionedCount);
verifyNoMoreInteractions(instanceMetaDataService);
verify(clusterDecomissionService).collectHostsToRemove(eq(hostGroup), eq(hostnamesToDecommission));
verify(clusterDecomissionService).decommissionClusterNodesStopStart(eq(collected), anyLong());
verifyNoMoreInteractions(flowMessageService);
verifyNoMoreInteractions(clusterDecomissionService);
}
Aggregations