use of com.sequenceiq.cloudbreak.cluster.api.ClusterSetupService in project cloudbreak by hortonworks.
the class StopStartUpscaleCommissionViaCMHandlerTest method testErrorFromWaitForHostsHealthy.
@Test
void testErrorFromWaitForHostsHealthy() throws ClusterClientInitException {
int commissionInstanceCount = 5;
List<InstanceMetaData> instancesToCommission = createInstancesToCommission(commissionInstanceCount);
HostGroup hostGroup = createHostGroup(instancesToCommission);
Set<String> hostNames = instancesToCommission.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toSet());
Map<String, InstanceMetaData> cmAvailableHosts = instancesToCommission.stream().collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
Set<String> recommissionedFqdns = cmAvailableHosts.keySet().stream().collect(Collectors.toUnmodifiableSet());
setupPerTestMocks(hostGroup, hostNames, cmAvailableHosts, recommissionedFqdns);
doThrow(new RuntimeException("waitForHostsHealthyException")).when(clusterSetupService).waitForHostsHealthy(anySet());
StopStartUpscaleCommissionViaCMRequest request = new StopStartUpscaleCommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instancesToCommission, Collections.emptyList());
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
assertThat(selectable).isInstanceOf(StopStartUpscaleCommissionViaCMResult.class);
StopStartUpscaleCommissionViaCMResult result = (StopStartUpscaleCommissionViaCMResult) selectable;
assertThat(result.getSuccessfullyCommissionedFqdns()).hasSize(0);
assertThat(result.getNotRecommissionedFqdns()).hasSize(0);
assertThat(result.getErrorDetails().getMessage()).isEqualTo("waitForHostsHealthyException");
assertThat(result.getStatus()).isEqualTo(EventStatus.FAILED);
assertThat(result.selector()).isEqualTo("STOPSTARTUPSCALECOMMISSIONVIACMRESULT_ERROR");
}
use of com.sequenceiq.cloudbreak.cluster.api.ClusterSetupService in project cloudbreak by hortonworks.
the class StopStartUpscaleCommissionViaCMHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<StopStartUpscaleCommissionViaCMRequest> event) {
StopStartUpscaleCommissionViaCMRequest request = event.getData();
LOGGER.info("StopStartUpscaleCommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event);
LOGGER.debug("StartedInstancesToCommission: {}, servicesNotRunningInstancesToCommission: {}", request.getStartedInstancesToCommission(), request.getServicesNotRunningInstancesToCommission());
List<InstanceMetaData> allInstancesToCommission = new LinkedList<>();
allInstancesToCommission.addAll(request.getStartedInstancesToCommission());
allInstancesToCommission.addAll(request.getServicesNotRunningInstancesToCommission());
try {
Stack stack = stackService.getByIdWithLists(request.getResourceId());
Cluster cluster = stack.getCluster();
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_UPSCALE_WAITING_HOSTSTART, String.valueOf(allInstancesToCommission.size()));
ClusterSetupService clusterSetupService = clusterApiConnectors.getConnector(stack).clusterSetupService();
clusterSetupService.waitForHostsHealthy(new HashSet<>(allInstancesToCommission));
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_UPSCALE_CMHOSTSSTARTED, String.valueOf(allInstancesToCommission.size()));
ClusterCommissionService clusterCommissionService = clusterApiConnectors.getConnector(stack).clusterCommissionService();
Set<String> hostNames = allInstancesToCommission.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toSet());
LOGGER.debug("HostNames to recommission: count={}, hostNames={}", hostNames.size(), hostNames);
HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
Map<String, InstanceMetaData> hostsToRecommission = clusterCommissionService.collectHostsToCommission(hostGroup, hostNames);
List<String> missingHostsInCm = Collections.emptyList();
if (hostNames.size() != hostsToRecommission.size()) {
missingHostsInCm = hostNames.stream().filter(h -> !hostsToRecommission.containsKey(h)).collect(Collectors.toList());
LOGGER.info("Found fewer instances in CM to commission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRecommission.size(), hostNames.size(), missingHostsInCm);
}
// TODO CB-15132: Eventually ensure CM, relevant services (YARN RM) are in a functional state - or fail/delay the operation
// TODO CB-15132: Potentially poll nodes for success. Don't fail the entire operation if a single node fails to commission.
// What would need to happen to the CM command in this case? (Can only work in the presence of a co-operative CM API call.
// Alternately this could go straight to the service)
Set<String> recommissionedHostnames = Collections.emptySet();
if (hostsToRecommission.size() > 0) {
recommissionedHostnames = clusterCommissionService.recommissionClusterNodes(hostsToRecommission);
// TODO CB-15132: Maybe wait for services to start / force CM sync.
}
List<String> allMissingRecommissionHostnames = null;
if (missingHostsInCm.size() > 0) {
allMissingRecommissionHostnames = new LinkedList<>(missingHostsInCm);
}
if (hostsToRecommission.size() != recommissionedHostnames.size()) {
Set<String> finalRecommissionedHostnames = recommissionedHostnames;
List<String> additionalMissingRecommissionHostnames = hostsToRecommission.keySet().stream().filter(h -> !finalRecommissionedHostnames.contains(h)).collect(Collectors.toList());
LOGGER.info("Recommissioned fewer instances than requested. recommissionedCount={}, expectedCount={}, initialCount={}, notRecommissioned=[{}]", recommissionedHostnames.size(), hostsToRecommission.size(), hostNames.size(), additionalMissingRecommissionHostnames);
if (allMissingRecommissionHostnames == null) {
allMissingRecommissionHostnames = new LinkedList<>();
}
allMissingRecommissionHostnames.addAll(additionalMissingRecommissionHostnames);
}
return new StopStartUpscaleCommissionViaCMResult(request, recommissionedHostnames, allMissingRecommissionHostnames);
} catch (Exception e) {
// TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
// ClusterClientInitException is one which is explicitly thrown.
String message = "Failed while attempting to commission nodes via CM";
LOGGER.error(message);
return new StopStartUpscaleCommissionViaCMResult(message, e, request);
}
}
Aggregations