Search in sources :

Example 1 with ClusterCommissionService

use of com.sequenceiq.cloudbreak.cluster.api.ClusterCommissionService in project cloudbreak by hortonworks.

the class StopStartUpscaleCommissionViaCMHandler method doAccept.

@Override
protected Selectable doAccept(HandlerEvent<StopStartUpscaleCommissionViaCMRequest> event) {
    StopStartUpscaleCommissionViaCMRequest request = event.getData();
    LOGGER.info("StopStartUpscaleCommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event);
    LOGGER.debug("StartedInstancesToCommission: {}, servicesNotRunningInstancesToCommission: {}", request.getStartedInstancesToCommission(), request.getServicesNotRunningInstancesToCommission());
    List<InstanceMetaData> allInstancesToCommission = new LinkedList<>();
    allInstancesToCommission.addAll(request.getStartedInstancesToCommission());
    allInstancesToCommission.addAll(request.getServicesNotRunningInstancesToCommission());
    try {
        Stack stack = stackService.getByIdWithLists(request.getResourceId());
        Cluster cluster = stack.getCluster();
        flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_UPSCALE_WAITING_HOSTSTART, String.valueOf(allInstancesToCommission.size()));
        ClusterSetupService clusterSetupService = clusterApiConnectors.getConnector(stack).clusterSetupService();
        clusterSetupService.waitForHostsHealthy(new HashSet<>(allInstancesToCommission));
        flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_UPSCALE_CMHOSTSSTARTED, String.valueOf(allInstancesToCommission.size()));
        ClusterCommissionService clusterCommissionService = clusterApiConnectors.getConnector(stack).clusterCommissionService();
        Set<String> hostNames = allInstancesToCommission.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toSet());
        LOGGER.debug("HostNames to recommission: count={}, hostNames={}", hostNames.size(), hostNames);
        HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
        Map<String, InstanceMetaData> hostsToRecommission = clusterCommissionService.collectHostsToCommission(hostGroup, hostNames);
        List<String> missingHostsInCm = Collections.emptyList();
        if (hostNames.size() != hostsToRecommission.size()) {
            missingHostsInCm = hostNames.stream().filter(h -> !hostsToRecommission.containsKey(h)).collect(Collectors.toList());
            LOGGER.info("Found fewer instances in CM to commission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRecommission.size(), hostNames.size(), missingHostsInCm);
        }
        // TODO CB-15132: Eventually ensure CM, relevant services (YARN RM) are in a functional state - or fail/delay the operation
        // TODO CB-15132: Potentially poll nodes for success. Don't fail the entire operation if a single node fails to commission.
        // What would need to happen to the CM command in this case? (Can only work in the presence of a co-operative CM API call.
        // Alternately this could go straight to the service)
        Set<String> recommissionedHostnames = Collections.emptySet();
        if (hostsToRecommission.size() > 0) {
            recommissionedHostnames = clusterCommissionService.recommissionClusterNodes(hostsToRecommission);
        // TODO CB-15132: Maybe wait for services to start / force CM sync.
        }
        List<String> allMissingRecommissionHostnames = null;
        if (missingHostsInCm.size() > 0) {
            allMissingRecommissionHostnames = new LinkedList<>(missingHostsInCm);
        }
        if (hostsToRecommission.size() != recommissionedHostnames.size()) {
            Set<String> finalRecommissionedHostnames = recommissionedHostnames;
            List<String> additionalMissingRecommissionHostnames = hostsToRecommission.keySet().stream().filter(h -> !finalRecommissionedHostnames.contains(h)).collect(Collectors.toList());
            LOGGER.info("Recommissioned fewer instances than requested. recommissionedCount={}, expectedCount={}, initialCount={}, notRecommissioned=[{}]", recommissionedHostnames.size(), hostsToRecommission.size(), hostNames.size(), additionalMissingRecommissionHostnames);
            if (allMissingRecommissionHostnames == null) {
                allMissingRecommissionHostnames = new LinkedList<>();
            }
            allMissingRecommissionHostnames.addAll(additionalMissingRecommissionHostnames);
        }
        return new StopStartUpscaleCommissionViaCMResult(request, recommissionedHostnames, allMissingRecommissionHostnames);
    } catch (Exception e) {
        // TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
        // ClusterClientInitException is one which is explicitly thrown.
        String message = "Failed while attempting to commission nodes via CM";
        LOGGER.error(message);
        return new StopStartUpscaleCommissionViaCMResult(message, e, request);
    }
}
Also used : ClusterSetupService(com.sequenceiq.cloudbreak.cluster.api.ClusterSetupService) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) Cluster(com.sequenceiq.cloudbreak.domain.stack.cluster.Cluster) LoggerFactory(org.slf4j.LoggerFactory) EventSelectorUtil(com.sequenceiq.flow.event.EventSelectorUtil) Selectable(com.sequenceiq.cloudbreak.common.event.Selectable) HostGroupService(com.sequenceiq.cloudbreak.service.hostgroup.HostGroupService) CLUSTER_SCALING_STOPSTART_UPSCALE_CMHOSTSSTARTED(com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_SCALING_STOPSTART_UPSCALE_CMHOSTSSTARTED) StopStartUpscaleCommissionViaCMResult(com.sequenceiq.cloudbreak.reactor.api.event.orchestration.StopStartUpscaleCommissionViaCMResult) HashSet(java.util.HashSet) Inject(javax.inject.Inject) ExceptionCatcherEventHandler(com.sequenceiq.flow.reactor.api.handler.ExceptionCatcherEventHandler) UPDATE_IN_PROGRESS(com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.UPDATE_IN_PROGRESS) Event(reactor.bus.Event) CloudbreakFlowMessageService(com.sequenceiq.cloudbreak.core.flow2.stack.CloudbreakFlowMessageService) Map(java.util.Map) ClusterSetupService(com.sequenceiq.cloudbreak.cluster.api.ClusterSetupService) CLUSTER_SCALING_STOPSTART_UPSCALE_WAITING_HOSTSTART(com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_SCALING_STOPSTART_UPSCALE_WAITING_HOSTSTART) LinkedList(java.util.LinkedList) NotFoundException(com.sequenceiq.cloudbreak.common.exception.NotFoundException) Logger(org.slf4j.Logger) Set(java.util.Set) HandlerEvent(com.sequenceiq.flow.reactor.api.handler.HandlerEvent) StopStartUpscaleCommissionViaCMRequest(com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest) Collectors(java.util.stream.Collectors) ClusterCommissionService(com.sequenceiq.cloudbreak.cluster.api.ClusterCommissionService) List(java.util.List) Component(org.springframework.stereotype.Component) HostGroup(com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) ClusterApiConnectors(com.sequenceiq.cloudbreak.service.cluster.ClusterApiConnectors) Collections(java.util.Collections) StackService(com.sequenceiq.cloudbreak.service.stack.StackService) StopStartUpscaleCommissionViaCMResult(com.sequenceiq.cloudbreak.reactor.api.event.orchestration.StopStartUpscaleCommissionViaCMResult) Cluster(com.sequenceiq.cloudbreak.domain.stack.cluster.Cluster) HostGroup(com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup) LinkedList(java.util.LinkedList) NotFoundException(com.sequenceiq.cloudbreak.common.exception.NotFoundException) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) StopStartUpscaleCommissionViaCMRequest(com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest) ClusterCommissionService(com.sequenceiq.cloudbreak.cluster.api.ClusterCommissionService)

Aggregations

UPDATE_IN_PROGRESS (com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.UPDATE_IN_PROGRESS)1 ClusterCommissionService (com.sequenceiq.cloudbreak.cluster.api.ClusterCommissionService)1 ClusterSetupService (com.sequenceiq.cloudbreak.cluster.api.ClusterSetupService)1 Selectable (com.sequenceiq.cloudbreak.common.event.Selectable)1 NotFoundException (com.sequenceiq.cloudbreak.common.exception.NotFoundException)1 CloudbreakFlowMessageService (com.sequenceiq.cloudbreak.core.flow2.stack.CloudbreakFlowMessageService)1 Stack (com.sequenceiq.cloudbreak.domain.stack.Stack)1 Cluster (com.sequenceiq.cloudbreak.domain.stack.cluster.Cluster)1 HostGroup (com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup)1 InstanceMetaData (com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData)1 CLUSTER_SCALING_STOPSTART_UPSCALE_CMHOSTSSTARTED (com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_SCALING_STOPSTART_UPSCALE_CMHOSTSSTARTED)1 CLUSTER_SCALING_STOPSTART_UPSCALE_WAITING_HOSTSTART (com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_SCALING_STOPSTART_UPSCALE_WAITING_HOSTSTART)1 StopStartUpscaleCommissionViaCMRequest (com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest)1 StopStartUpscaleCommissionViaCMResult (com.sequenceiq.cloudbreak.reactor.api.event.orchestration.StopStartUpscaleCommissionViaCMResult)1 ClusterApiConnectors (com.sequenceiq.cloudbreak.service.cluster.ClusterApiConnectors)1 HostGroupService (com.sequenceiq.cloudbreak.service.hostgroup.HostGroupService)1 StackService (com.sequenceiq.cloudbreak.service.stack.StackService)1 EventSelectorUtil (com.sequenceiq.flow.event.EventSelectorUtil)1 ExceptionCatcherEventHandler (com.sequenceiq.flow.reactor.api.handler.ExceptionCatcherEventHandler)1 HandlerEvent (com.sequenceiq.flow.reactor.api.handler.HandlerEvent)1