Search in sources :

Example 16 with InstanceMetaData

use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.

the class StopStartDownscaleActions method decommissionViaCmFailedAction.

@Bean(name = "STOPSTART_DOWNSCALE_DECOMMISSION_VIA_CM_FAILED_STATE")
public Action<?, ?> decommissionViaCmFailedAction() {
    return new AbstractStopStartDownscaleActions<>(StopStartDownscaleDecommissionViaCMResult.class) {

        @Override
        protected void doExecute(StopStartDownscaleContext context, StopStartDownscaleDecommissionViaCMResult payload, Map<Object, Object> variables) throws Exception {
            LOGGER.warn("Failure during the decommissionViaCm step");
            // TODO CB-14929. Should the nodes be put into an ORCHESTRATOR_FAILED state? What are the manual recovery steps from this state.
            Set<String> hostnames = getHostNamesForPrivateIds(payload.getRequest().getInstanceIdsToDecommission(), context.getStack());
            stopStartDownscaleFlowService.decommissionViaCmFailed(payload.getResourceId(), hostnames);
            sendEvent(context, STOPSTART_DOWNSCALE_FAILURE_EVENT.event(), new StackFailureEvent(payload.getResourceId(), payload.getErrorDetails()));
        }

        private Set<String> getHostNamesForPrivateIds(Set<Long> hostIdsToRemove, Stack stack) {
            return hostIdsToRemove.stream().map(privateId -> {
                Optional<InstanceMetaData> instanceMetadata = stackService.getInstanceMetadata(stack.getInstanceMetaDataAsList(), privateId);
                return instanceMetadata.map(InstanceMetaData::getDiscoveryFQDN).orElse(null);
            }).filter(StringUtils::isNotEmpty).collect(Collectors.toSet());
        }
    };
}
Also used : InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) StopStartDownscaleDecommissionViaCMResult(com.sequenceiq.cloudbreak.reactor.api.event.orchestration.StopStartDownscaleDecommissionViaCMResult) Set(java.util.Set) HashSet(java.util.HashSet) StackFailureEvent(com.sequenceiq.cloudbreak.reactor.api.event.StackFailureEvent) Map(java.util.Map) CloudStack(com.sequenceiq.cloudbreak.cloud.model.CloudStack) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) Bean(org.springframework.context.annotation.Bean)

Example 17 with InstanceMetaData

use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.

the class StopStartUpscaleActions method cmCommissionAction.

@Bean(name = "STOPSTART_UPSCALE_HOSTS_COMMISSION_STATE")
public Action<?, ?> cmCommissionAction() {
    return new AbstractStopStartUpscaleActions<>(StopStartUpscaleStartInstancesResult.class) {

        @Override
        protected void doExecute(StopStartUpscaleContext context, StopStartUpscaleStartInstancesResult payload, Map<Object, Object> variables) throws Exception {
            // Update instance metadata for successful nodes before handling / logging info about failures.
            List<CloudVmInstanceStatus> cloudVmInstanceStatusList = payload.getAffectedInstanceStatuses();
            Set<String> cloudInstanceIdsStarted = cloudVmInstanceStatusList.stream().filter(x -> x.getStatus() == InstanceStatus.STARTED).map(x -> x.getCloudInstance().getInstanceId()).collect(Collectors.toUnmodifiableSet());
            List<InstanceMetaData> startedInstancesMetaData = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), cloudInstanceIdsStarted);
            clusterUpscaleFlowService.instancesStarted(context.getStack().getId(), startedInstancesMetaData);
            handleInstanceUnsuccessfulStart(context, cloudVmInstanceStatusList);
            // This list is currently empty. It could be populated later in another flow-step by querying CM to get service health.
            // Meant to be a mechanism which detects cloud instances which are RUNNING, but not being utilized (likely due to previous failures)
            List<CloudInstance> instancesWithServicesNotRunning = payload.getStartInstanceRequest().getStartedInstancesWithServicesNotRunning();
            List<InstanceMetaData> metaDataWithServicesNotRunning = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), instancesWithServicesNotRunning.stream().map(i -> i.getInstanceId()).collect(Collectors.toUnmodifiableSet()));
            LOGGER.info("StartedInstancesCount={}, StartedInstancesMetadataCount={}," + " instancesWithServicesNotRunningCount={}, instancesWithServicesNotRunningMetadataCount={}", cloudInstanceIdsStarted.size(), startedInstancesMetaData.size(), instancesWithServicesNotRunning.size(), metaDataWithServicesNotRunning.size());
            int toCommissionNodeCount = metaDataWithServicesNotRunning.size() + startedInstancesMetaData.size();
            if (toCommissionNodeCount < context.getAdjustment()) {
                LOGGER.warn("Not enough nodes found to commission. DesiredCount={}, availableCount={}", context.getAdjustment(), toCommissionNodeCount);
                clusterUpscaleFlowService.warnNotEnoughInstances(context.getStack().getId(), context.getHostGroupName(), context.getAdjustment(), toCommissionNodeCount);
            }
            clusterUpscaleFlowService.upscaleCommissioningNodes(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
            StopStartUpscaleCommissionViaCMRequest commissionRequest = new StopStartUpscaleCommissionViaCMRequest(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
            sendEvent(context, commissionRequest);
        }

        private void handleInstanceUnsuccessfulStart(StopStartUpscaleContext context, List<CloudVmInstanceStatus> cloudVmInstanceStatusList) {
            try {
                List<CloudVmInstanceStatus> instancesNotInDesiredState = cloudVmInstanceStatusList.stream().filter(i -> i.getStatus() != InstanceStatus.STARTED).collect(Collectors.toList());
                if (instancesNotInDesiredState.size() > 0) {
                    // Not updating the status of these instances in the DB. Instead letting the regular syncer threads take care of this.
                    // This is in case there is additional logic in the syncers while processing Instance state changes.
                    LOGGER.warn("Some instances could not be started: count={}, instances={}", instancesNotInDesiredState.size(), instancesNotInDesiredState);
                    clusterUpscaleFlowService.logInstancesFailedToStart(context.getStack().getId(), instancesNotInDesiredState);
                // TODO CB-15132: Eventually, we may want to take some corrective action.
                }
            } catch (Exception e) {
                LOGGER.warn("Failed while attempting to log info about instances which did not start. Ignoring, and letting flow proceed", e);
            }
        }
    };
}
Also used : Action(org.springframework.statemachine.action.Action) DetailedStackStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus) CloudContext(com.sequenceiq.cloudbreak.cloud.context.CloudContext) AvailabilityZone.availabilityZone(com.sequenceiq.cloudbreak.cloud.model.AvailabilityZone.availabilityZone) StopStartUpscaleStartInstancesRequest(com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesRequest) LoggerFactory(org.slf4j.LoggerFactory) StackToCloudStackConverter(com.sequenceiq.cloudbreak.converter.spi.StackToCloudStackConverter) Location.location(com.sequenceiq.cloudbreak.cloud.model.Location.location) CloudInstance(com.sequenceiq.cloudbreak.cloud.model.CloudInstance) STOPSTART_UPSCALE_FAILURE_EVENT(com.sequenceiq.cloudbreak.core.flow2.cluster.stopstartus.StopStartUpscaleEvent.STOPSTART_UPSCALE_FAILURE_EVENT) InstanceMetaDataToCloudInstanceConverter(com.sequenceiq.cloudbreak.converter.spi.InstanceMetaDataToCloudInstanceConverter) Map(java.util.Map) Location(com.sequenceiq.cloudbreak.cloud.model.Location) CloudVmInstanceStatus(com.sequenceiq.cloudbreak.cloud.model.CloudVmInstanceStatus) AbstractStackAction(com.sequenceiq.cloudbreak.core.flow2.AbstractStackAction) AbstractStackFailureAction(com.sequenceiq.cloudbreak.core.flow2.stack.AbstractStackFailureAction) STOPPED(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.STOPPED) StackFailureContext(com.sequenceiq.cloudbreak.core.flow2.stack.StackFailureContext) Set(java.util.Set) FlowParameters(com.sequenceiq.flow.core.FlowParameters) StopStartUpscaleStartInstancesResult(com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult) Crn(com.sequenceiq.cloudbreak.auth.crn.Crn) StopStartUpscaleCommissionViaCMRequest(com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest) Collectors(java.util.stream.Collectors) Configuration(org.springframework.context.annotation.Configuration) CloudStack(com.sequenceiq.cloudbreak.cloud.model.CloudStack) List(java.util.List) StackUtil(com.sequenceiq.cloudbreak.util.StackUtil) Optional(java.util.Optional) StackService(com.sequenceiq.cloudbreak.service.stack.StackService) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) Payload(com.sequenceiq.cloudbreak.common.event.Payload) MDCBuilder(com.sequenceiq.cloudbreak.logger.MDCBuilder) Selectable(com.sequenceiq.cloudbreak.common.event.Selectable) StackFailureEvent(com.sequenceiq.cloudbreak.reactor.api.event.StackFailureEvent) StackEvent(com.sequenceiq.cloudbreak.reactor.api.event.StackEvent) StopStartUpscaleCommissionViaCMResult(com.sequenceiq.cloudbreak.reactor.api.event.orchestration.StopStartUpscaleCommissionViaCMResult) HashSet(java.util.HashSet) Inject(javax.inject.Inject) ResourceService(com.sequenceiq.cloudbreak.service.resource.ResourceService) CloudInstanceIdToInstanceMetaDataConverter(com.sequenceiq.cloudbreak.converter.CloudInstanceIdToInstanceMetaDataConverter) ClusterManagerType(com.sequenceiq.cloudbreak.common.type.ClusterManagerType) StateContext(org.springframework.statemachine.StateContext) Logger(org.slf4j.Logger) Region.region(com.sequenceiq.cloudbreak.cloud.model.Region.region) CloudCredential(com.sequenceiq.cloudbreak.cloud.model.CloudCredential) STOPSTART_UPSCALE_FINALIZED_EVENT(com.sequenceiq.cloudbreak.core.flow2.cluster.stopstartus.StopStartUpscaleEvent.STOPSTART_UPSCALE_FINALIZED_EVENT) InstanceStatus(com.sequenceiq.cloudbreak.cloud.model.InstanceStatus) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) Bean(org.springframework.context.annotation.Bean) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StopStartUpscaleTriggerEvent(com.sequenceiq.cloudbreak.core.flow2.event.StopStartUpscaleTriggerEvent) Collections(java.util.Collections) CloudVmInstanceStatus(com.sequenceiq.cloudbreak.cloud.model.CloudVmInstanceStatus) CloudInstance(com.sequenceiq.cloudbreak.cloud.model.CloudInstance) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) StopStartUpscaleStartInstancesResult(com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult) StopStartUpscaleCommissionViaCMRequest(com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest) List(java.util.List) Map(java.util.Map) Bean(org.springframework.context.annotation.Bean)

Example 18 with InstanceMetaData

use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.

the class ChangePrimaryGatewayService method primaryGatewayChanged.

public void primaryGatewayChanged(long stackId, String newPrimaryGatewayFQDN) throws CloudbreakException, TransactionExecutionException {
    LOGGER.info("Update primary gateway ip");
    Set<InstanceMetaData> imds = instanceMetaDataService.findNotTerminatedAndNotZombieForStack(stackId);
    Optional<InstanceMetaData> formerPrimaryGateway = imds.stream().filter(imd -> imd.getInstanceMetadataType() == InstanceMetadataType.GATEWAY_PRIMARY).findFirst();
    Optional<InstanceMetaData> newPrimaryGateway = imds.stream().filter(imd -> imd.getDiscoveryFQDN() != null && imd.getDiscoveryFQDN().equals(newPrimaryGatewayFQDN)).findFirst();
    if (newPrimaryGateway.isPresent() && formerPrimaryGateway.isPresent()) {
        InstanceMetaData fpg = formerPrimaryGateway.get();
        fpg.setInstanceMetadataType(InstanceMetadataType.GATEWAY);
        fpg.setServer(Boolean.FALSE);
        transactionService.required(() -> {
            instanceMetaDataService.save(fpg);
            InstanceMetaData npg = newPrimaryGateway.get();
            npg.setInstanceMetadataType(InstanceMetadataType.GATEWAY_PRIMARY);
            npg.setServer(Boolean.TRUE);
            instanceMetaDataService.save(npg);
            Stack updatedStack = stackService.getByIdWithListsInTransaction(stackId);
            String gatewayIp = gatewayConfigService.getPrimaryGatewayIp(updatedStack);
            Cluster cluster = updatedStack.getCluster();
            cluster.setClusterManagerIp(gatewayIp);
            LOGGER.info("Primary gateway IP has been updated to: '{}'", gatewayIp);
            clusterService.save(cluster);
            clusterPublicEndpointManagementService.changeGateway(updatedStack);
            return null;
        });
    } else {
        throw new CloudbreakException("Primary gateway change was not successful.");
    }
}
Also used : InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack) Cluster(com.sequenceiq.cloudbreak.domain.stack.cluster.Cluster) StackView(com.sequenceiq.cloudbreak.domain.view.StackView) CloudbreakException(com.sequenceiq.cloudbreak.service.CloudbreakException) DetailedStackStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.common.DetailedStackStatus) AVAILABLE(com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.AVAILABLE) LoggerFactory(org.slf4j.LoggerFactory) ClusterService(com.sequenceiq.cloudbreak.service.cluster.ClusterService) ClusterPublicEndpointManagementService(com.sequenceiq.cloudbreak.service.publicendpoint.ClusterPublicEndpointManagementService) CLUSTER_GATEWAY_CHANGE(com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_GATEWAY_CHANGE) Inject(javax.inject.Inject) TransactionService(com.sequenceiq.cloudbreak.common.service.TransactionService) UPDATE_IN_PROGRESS(com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.UPDATE_IN_PROGRESS) CloudbreakFlowMessageService(com.sequenceiq.cloudbreak.core.flow2.stack.CloudbreakFlowMessageService) StackUpdater(com.sequenceiq.cloudbreak.service.StackUpdater) InstanceMetaDataService(com.sequenceiq.cloudbreak.service.stack.InstanceMetaDataService) CLUSTER_GATEWAY_CHANGE_FAILED(com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_GATEWAY_CHANGE_FAILED) CLUSTER_GATEWAY_CHANGED_SUCCESSFULLY(com.sequenceiq.cloudbreak.event.ResourceEvent.CLUSTER_GATEWAY_CHANGED_SUCCESSFULLY) UPDATE_FAILED(com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.UPDATE_FAILED) InstanceMetadataType(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceMetadataType) Logger(org.slf4j.Logger) Set(java.util.Set) GatewayConfigService(com.sequenceiq.cloudbreak.service.GatewayConfigService) Component(org.springframework.stereotype.Component) StackUtil(com.sequenceiq.cloudbreak.util.StackUtil) InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) TransactionExecutionException(com.sequenceiq.cloudbreak.common.service.TransactionService.TransactionExecutionException) Optional(java.util.Optional) StackService(com.sequenceiq.cloudbreak.service.stack.StackService) Cluster(com.sequenceiq.cloudbreak.domain.stack.cluster.Cluster) CloudbreakException(com.sequenceiq.cloudbreak.service.CloudbreakException) Stack(com.sequenceiq.cloudbreak.domain.stack.Stack)

Example 19 with InstanceMetaData

use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.

the class ClusterStartService method updateInstancesToHealthy.

private void updateInstancesToHealthy(StackView stack) {
    try {
        transactionService.required(() -> {
            Set<InstanceMetaData> instances = instanceMetaDataService.findNotTerminatedAndNotZombieForStack(stack.getId());
            for (InstanceMetaData metaData : instances) {
                metaData.setInstanceStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.SERVICES_HEALTHY);
            }
            instanceMetaDataService.saveAll(instances);
        });
    } catch (TransactionExecutionException e) {
        throw new TransactionRuntimeExecutionException(e);
    }
}
Also used : InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData) TransactionExecutionException(com.sequenceiq.cloudbreak.common.service.TransactionService.TransactionExecutionException) TransactionRuntimeExecutionException(com.sequenceiq.cloudbreak.common.service.TransactionService.TransactionRuntimeExecutionException)

Example 20 with InstanceMetaData

use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.

the class ClusterUpscaleFlowService method updateMissingHostMetaDatas.

private int updateMissingHostMetaDatas(Collection<String> successHosts, Iterable<InstanceMetaData> instanceMetaDatas, InstanceStatus instanceStatus) {
    int failedHosts = 0;
    for (InstanceMetaData metaData : instanceMetaDatas) {
        if (!successHosts.contains(metaData.getDiscoveryFQDN())) {
            instanceMetaDataService.updateInstanceStatus(metaData, instanceStatus, "Cluster upscale failed. Host does not have fqdn.");
            failedHosts++;
        }
    }
    return failedHosts;
}
Also used : InstanceMetaData(com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData)

Aggregations

InstanceMetaData (com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData)415 Stack (com.sequenceiq.cloudbreak.domain.stack.Stack)165 InstanceGroup (com.sequenceiq.cloudbreak.domain.stack.instance.InstanceGroup)152 Test (org.junit.jupiter.api.Test)143 Map (java.util.Map)92 HashSet (java.util.HashSet)90 Set (java.util.Set)86 List (java.util.List)84 HostGroup (com.sequenceiq.cloudbreak.domain.stack.cluster.host.HostGroup)77 Cluster (com.sequenceiq.cloudbreak.domain.stack.cluster.Cluster)73 Collectors (java.util.stream.Collectors)71 ArrayList (java.util.ArrayList)62 Test (org.junit.Test)60 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)57 Optional (java.util.Optional)52 StackService (com.sequenceiq.cloudbreak.service.stack.StackService)48 Inject (javax.inject.Inject)47 Logger (org.slf4j.Logger)47 LoggerFactory (org.slf4j.LoggerFactory)47 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)45