use of com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest in project cloudbreak by hortonworks.
the class StopStartUpscaleActions method cmCommissionAction.
@Bean(name = "STOPSTART_UPSCALE_HOSTS_COMMISSION_STATE")
public Action<?, ?> cmCommissionAction() {
return new AbstractStopStartUpscaleActions<>(StopStartUpscaleStartInstancesResult.class) {
@Override
protected void doExecute(StopStartUpscaleContext context, StopStartUpscaleStartInstancesResult payload, Map<Object, Object> variables) throws Exception {
// Update instance metadata for successful nodes before handling / logging info about failures.
List<CloudVmInstanceStatus> cloudVmInstanceStatusList = payload.getAffectedInstanceStatuses();
Set<String> cloudInstanceIdsStarted = cloudVmInstanceStatusList.stream().filter(x -> x.getStatus() == InstanceStatus.STARTED).map(x -> x.getCloudInstance().getInstanceId()).collect(Collectors.toUnmodifiableSet());
List<InstanceMetaData> startedInstancesMetaData = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), cloudInstanceIdsStarted);
clusterUpscaleFlowService.instancesStarted(context.getStack().getId(), startedInstancesMetaData);
handleInstanceUnsuccessfulStart(context, cloudVmInstanceStatusList);
// This list is currently empty. It could be populated later in another flow-step by querying CM to get service health.
// Meant to be a mechanism which detects cloud instances which are RUNNING, but not being utilized (likely due to previous failures)
List<CloudInstance> instancesWithServicesNotRunning = payload.getStartInstanceRequest().getStartedInstancesWithServicesNotRunning();
List<InstanceMetaData> metaDataWithServicesNotRunning = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), instancesWithServicesNotRunning.stream().map(i -> i.getInstanceId()).collect(Collectors.toUnmodifiableSet()));
LOGGER.info("StartedInstancesCount={}, StartedInstancesMetadataCount={}," + " instancesWithServicesNotRunningCount={}, instancesWithServicesNotRunningMetadataCount={}", cloudInstanceIdsStarted.size(), startedInstancesMetaData.size(), instancesWithServicesNotRunning.size(), metaDataWithServicesNotRunning.size());
int toCommissionNodeCount = metaDataWithServicesNotRunning.size() + startedInstancesMetaData.size();
if (toCommissionNodeCount < context.getAdjustment()) {
LOGGER.warn("Not enough nodes found to commission. DesiredCount={}, availableCount={}", context.getAdjustment(), toCommissionNodeCount);
clusterUpscaleFlowService.warnNotEnoughInstances(context.getStack().getId(), context.getHostGroupName(), context.getAdjustment(), toCommissionNodeCount);
}
clusterUpscaleFlowService.upscaleCommissioningNodes(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
StopStartUpscaleCommissionViaCMRequest commissionRequest = new StopStartUpscaleCommissionViaCMRequest(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
sendEvent(context, commissionRequest);
}
private void handleInstanceUnsuccessfulStart(StopStartUpscaleContext context, List<CloudVmInstanceStatus> cloudVmInstanceStatusList) {
try {
List<CloudVmInstanceStatus> instancesNotInDesiredState = cloudVmInstanceStatusList.stream().filter(i -> i.getStatus() != InstanceStatus.STARTED).collect(Collectors.toList());
if (instancesNotInDesiredState.size() > 0) {
// Not updating the status of these instances in the DB. Instead letting the regular syncer threads take care of this.
// This is in case there is additional logic in the syncers while processing Instance state changes.
LOGGER.warn("Some instances could not be started: count={}, instances={}", instancesNotInDesiredState.size(), instancesNotInDesiredState);
clusterUpscaleFlowService.logInstancesFailedToStart(context.getStack().getId(), instancesNotInDesiredState);
// TODO CB-15132: Eventually, we may want to take some corrective action.
}
} catch (Exception e) {
LOGGER.warn("Failed while attempting to log info about instances which did not start. Ignoring, and letting flow proceed", e);
}
}
};
}
use of com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest in project cloudbreak by hortonworks.
the class StopStartUpscaleCommissionViaCMHandlerTest method testAllCommissioned.
@Test
void testAllCommissioned() throws ClusterClientInitException {
int commissionInstanceCount = 5;
List<InstanceMetaData> instancesToCommission = createInstancesToCommission(commissionInstanceCount);
HostGroup hostGroup = createHostGroup(instancesToCommission);
Set<String> hostNames = instancesToCommission.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toSet());
Map<String, InstanceMetaData> cmAvailableHosts = instancesToCommission.stream().collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
Set<String> recommissionedFqdns = cmAvailableHosts.keySet().stream().collect(Collectors.toUnmodifiableSet());
setupPerTestMocks(hostGroup, hostNames, cmAvailableHosts, recommissionedFqdns);
StopStartUpscaleCommissionViaCMRequest request = new StopStartUpscaleCommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instancesToCommission, Collections.emptyList());
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
assertThat(selectable).isInstanceOf(StopStartUpscaleCommissionViaCMResult.class);
StopStartUpscaleCommissionViaCMResult result = (StopStartUpscaleCommissionViaCMResult) selectable;
assertThat(result.getNotRecommissionedFqdns()).hasSize(0);
assertThat(result.getSuccessfullyCommissionedFqdns()).hasSize(commissionInstanceCount);
}
use of com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest in project cloudbreak by hortonworks.
the class StopStartUpscaleCommissionViaCMHandlerTest method testFewerNodesOnBothCmInvocations.
@Test
void testFewerNodesOnBothCmInvocations() throws ClusterClientInitException {
int commissionInstanceCount = 5;
List<InstanceMetaData> instancesToCommission = createInstancesToCommission(commissionInstanceCount);
HostGroup hostGroup = createHostGroup(instancesToCommission);
Set<String> hostNames = instancesToCommission.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toSet());
Map<String, InstanceMetaData> cmAvailableHosts = instancesToCommission.stream().limit(commissionInstanceCount - 1).collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
Set<String> recommissionedFqdns = cmAvailableHosts.keySet().stream().limit(commissionInstanceCount - 2).collect(Collectors.toUnmodifiableSet());
setupPerTestMocks(hostGroup, hostNames, cmAvailableHosts, recommissionedFqdns);
StopStartUpscaleCommissionViaCMRequest request = new StopStartUpscaleCommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instancesToCommission, Collections.emptyList());
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
assertThat(selectable).isInstanceOf(StopStartUpscaleCommissionViaCMResult.class);
StopStartUpscaleCommissionViaCMResult result = (StopStartUpscaleCommissionViaCMResult) selectable;
assertThat(result.getNotRecommissionedFqdns()).hasSize(2);
assertThat(result.getSuccessfullyCommissionedFqdns()).hasSize(commissionInstanceCount - 2);
}
use of com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest in project cloudbreak by hortonworks.
the class StopStartUpscaleCommissionViaCMHandlerTest method testErrorFromCmHostCollection.
@Test
void testErrorFromCmHostCollection() throws ClusterClientInitException {
int commissionInstanceCount = 5;
List<InstanceMetaData> instancesToCommission = createInstancesToCommission(commissionInstanceCount);
HostGroup hostGroup = createHostGroup(instancesToCommission);
Set<String> hostNames = instancesToCommission.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toSet());
Map<String, InstanceMetaData> cmAvailableHosts = instancesToCommission.stream().collect(Collectors.toMap(i -> i.getDiscoveryFQDN(), i -> i));
Set<String> recommissionedFqdns = cmAvailableHosts.keySet().stream().collect(Collectors.toUnmodifiableSet());
setupPerTestMocks(hostGroup, hostNames, cmAvailableHosts, recommissionedFqdns);
when(clusterCommissionService.collectHostsToCommission(eq(hostGroup), eq(hostNames))).thenThrow(new RuntimeException("collectHostsToCommissionError"));
StopStartUpscaleCommissionViaCMRequest request = new StopStartUpscaleCommissionViaCMRequest(1L, INSTANCE_GROUP_NAME, instancesToCommission, Collections.emptyList());
HandlerEvent handlerEvent = new HandlerEvent(Event.wrap(request));
Selectable selectable = underTest.doAccept(handlerEvent);
assertThat(selectable).isInstanceOf(StopStartUpscaleCommissionViaCMResult.class);
StopStartUpscaleCommissionViaCMResult result = (StopStartUpscaleCommissionViaCMResult) selectable;
assertThat(result.getSuccessfullyCommissionedFqdns()).hasSize(0);
assertThat(result.getNotRecommissionedFqdns()).hasSize(0);
assertThat(result.getErrorDetails().getMessage()).isEqualTo("collectHostsToCommissionError");
assertThat(result.getStatus()).isEqualTo(EventStatus.FAILED);
assertThat(result.selector()).isEqualTo("STOPSTARTUPSCALECOMMISSIONVIACMRESULT_ERROR");
}
use of com.sequenceiq.cloudbreak.reactor.api.event.cluster.StopStartUpscaleCommissionViaCMRequest in project cloudbreak by hortonworks.
the class StopStartUpscaleActionsTest method testUpscaleFinishedAction2.
@Test
void testUpscaleFinishedAction2() throws Exception {
// Some did not commission.
AbstractStopStartUpscaleActions<StopStartUpscaleCommissionViaCMResult> action = (AbstractStopStartUpscaleActions<StopStartUpscaleCommissionViaCMResult>) underTest.upscaleFinishedAction();
initActionPrivateFields(action);
int adjustment = 5;
StopStartUpscaleContext stopStartUpscaleContext = createContext(adjustment);
List<InstanceMetaData> instancesActionableNotStopped = generateInstances(5, 100, InstanceStatus.SERVICES_HEALTHY, INSTANCE_GROUP_NAME_ACTIONABLE);
List<InstanceMetaData> instancesActionableStopped = generateInstances(10, 200, InstanceStatus.STOPPED, INSTANCE_GROUP_NAME_ACTIONABLE);
List<InstanceMetaData> instancesRandomNotStopped = generateInstances(3, 300, InstanceStatus.SERVICES_HEALTHY, INSTANCE_GROUP_NAME_RANDOM);
List<InstanceMetaData> instancesRandomStopped = generateInstances(8, 400, InstanceStatus.STOPPED, INSTANCE_GROUP_NAME_RANDOM);
List<InstanceMetaData> startedInstances = instancesActionableStopped.subList(0, adjustment - 2);
List<InstanceMetaData> notCommissioned = instancesActionableStopped.subList(adjustment - 2, adjustment);
List<String> notCommissionedFqdns = notCommissioned.stream().map(x -> x.getDiscoveryFQDN()).collect(Collectors.toList());
StopStartUpscaleCommissionViaCMRequest request = new StopStartUpscaleCommissionViaCMRequest(1L, INSTANCE_GROUP_NAME_ACTIONABLE, startedInstances, notCommissioned);
Set<String> successfullyCommissionedFqdns = startedInstances.stream().map(i -> i.getDiscoveryFQDN()).collect(Collectors.toUnmodifiableSet());
StopStartUpscaleCommissionViaCMResult payload = new StopStartUpscaleCommissionViaCMResult(request, successfullyCommissionedFqdns, notCommissionedFqdns);
// Mocks
mockStackEtc(instancesActionableNotStopped, instancesActionableStopped, instancesRandomNotStopped, instancesRandomStopped);
when(reactorEventFactory.createEvent(anyMap(), isNotNull())).thenReturn(event);
new AbstractActionTestSupport<>(action).doExecute(stopStartUpscaleContext, payload, Collections.emptyMap());
verify(stopStartUpscaleFlowService).logInstancesFailedToCommission(eq(STACK_ID), eq(notCommissionedFqdns));
verify(stopStartUpscaleFlowService).clusterUpscaleFinished(any(), eq(INSTANCE_GROUP_NAME_ACTIONABLE), eq(startedInstances), eq(DetailedStackStatus.AVAILABLE));
ArgumentCaptor<Object> argumentCaptor = ArgumentCaptor.forClass(Object.class);
verify(reactorEventFactory).createEvent(anyMap(), argumentCaptor.capture());
verify(eventBus).notify("STOPSTART_UPSCALE_FINALIZED_EVENT", event);
assertThat(argumentCaptor.getValue()).isInstanceOf(StopStartUpscaleCommissionViaCMResult.class);
}
Aggregations