use of com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult in project cloudbreak by hortonworks.
the class StopStartUpscaleActions method cmCommissionAction.
@Bean(name = "STOPSTART_UPSCALE_HOSTS_COMMISSION_STATE")
public Action<?, ?> cmCommissionAction() {
return new AbstractStopStartUpscaleActions<>(StopStartUpscaleStartInstancesResult.class) {
@Override
protected void doExecute(StopStartUpscaleContext context, StopStartUpscaleStartInstancesResult payload, Map<Object, Object> variables) throws Exception {
// Update instance metadata for successful nodes before handling / logging info about failures.
List<CloudVmInstanceStatus> cloudVmInstanceStatusList = payload.getAffectedInstanceStatuses();
Set<String> cloudInstanceIdsStarted = cloudVmInstanceStatusList.stream().filter(x -> x.getStatus() == InstanceStatus.STARTED).map(x -> x.getCloudInstance().getInstanceId()).collect(Collectors.toUnmodifiableSet());
List<InstanceMetaData> startedInstancesMetaData = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), cloudInstanceIdsStarted);
clusterUpscaleFlowService.instancesStarted(context.getStack().getId(), startedInstancesMetaData);
handleInstanceUnsuccessfulStart(context, cloudVmInstanceStatusList);
// This list is currently empty. It could be populated later in another flow-step by querying CM to get service health.
// Meant to be a mechanism which detects cloud instances which are RUNNING, but not being utilized (likely due to previous failures)
List<CloudInstance> instancesWithServicesNotRunning = payload.getStartInstanceRequest().getStartedInstancesWithServicesNotRunning();
List<InstanceMetaData> metaDataWithServicesNotRunning = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), instancesWithServicesNotRunning.stream().map(i -> i.getInstanceId()).collect(Collectors.toUnmodifiableSet()));
LOGGER.info("StartedInstancesCount={}, StartedInstancesMetadataCount={}," + " instancesWithServicesNotRunningCount={}, instancesWithServicesNotRunningMetadataCount={}", cloudInstanceIdsStarted.size(), startedInstancesMetaData.size(), instancesWithServicesNotRunning.size(), metaDataWithServicesNotRunning.size());
int toCommissionNodeCount = metaDataWithServicesNotRunning.size() + startedInstancesMetaData.size();
if (toCommissionNodeCount < context.getAdjustment()) {
LOGGER.warn("Not enough nodes found to commission. DesiredCount={}, availableCount={}", context.getAdjustment(), toCommissionNodeCount);
clusterUpscaleFlowService.warnNotEnoughInstances(context.getStack().getId(), context.getHostGroupName(), context.getAdjustment(), toCommissionNodeCount);
}
clusterUpscaleFlowService.upscaleCommissioningNodes(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
StopStartUpscaleCommissionViaCMRequest commissionRequest = new StopStartUpscaleCommissionViaCMRequest(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
sendEvent(context, commissionRequest);
}
private void handleInstanceUnsuccessfulStart(StopStartUpscaleContext context, List<CloudVmInstanceStatus> cloudVmInstanceStatusList) {
try {
List<CloudVmInstanceStatus> instancesNotInDesiredState = cloudVmInstanceStatusList.stream().filter(i -> i.getStatus() != InstanceStatus.STARTED).collect(Collectors.toList());
if (instancesNotInDesiredState.size() > 0) {
// Not updating the status of these instances in the DB. Instead letting the regular syncer threads take care of this.
// This is in case there is additional logic in the syncers while processing Instance state changes.
LOGGER.warn("Some instances could not be started: count={}, instances={}", instancesNotInDesiredState.size(), instancesNotInDesiredState);
clusterUpscaleFlowService.logInstancesFailedToStart(context.getStack().getId(), instancesNotInDesiredState);
// TODO CB-15132: Eventually, we may want to take some corrective action.
}
} catch (Exception e) {
LOGGER.warn("Failed while attempting to log info about instances which did not start. Ignoring, and letting flow proceed", e);
}
}
};
}
use of com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult in project cloudbreak by hortonworks.
the class StopStartUpscaleActions method startInstancesFailedAction.
@Bean(name = "STOPSTART_UPSCALE_START_INSTANCE_FAILED_STATE")
public Action<?, ?> startInstancesFailedAction() {
return new AbstractStopStartUpscaleActions<>(StopStartUpscaleStartInstancesResult.class) {
@Override
protected void doExecute(StopStartUpscaleContext context, StopStartUpscaleStartInstancesResult payload, Map<Object, Object> variables) throws Exception {
LOGGER.warn("Failure during startInstancesOnCloudProvider");
// TODO CB-14929. Should the nodes be put into an ORCHESTRATOR_FAILED state? What are the manual recovery steps from this state.
clusterUpscaleFlowService.startInstancesFailed(payload.getResourceId(), payload.getStartInstanceRequest().getStoppedCloudInstancesInHg());
sendEvent(context, STOPSTART_UPSCALE_FAILURE_EVENT.event(), new StackFailureEvent(payload.getResourceId(), payload.getErrorDetails()));
}
};
}
use of com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult in project cloudbreak by hortonworks.
the class StopStartUpscaleActionsTest method testCmCommissionAction2.
@Test
void testCmCommissionAction2() throws Exception {
// All instances didn't start from the previous step.
AbstractStopStartUpscaleActions<StopStartUpscaleStartInstancesResult> action = (AbstractStopStartUpscaleActions<StopStartUpscaleStartInstancesResult>) underTest.cmCommissionAction();
initActionPrivateFields(action);
int adjustment = 5;
StopStartUpscaleContext stopStartUpscaleContext = createContext(adjustment);
List<InstanceMetaData> instancesActionableNotStopped = generateInstances(5, 100, InstanceStatus.SERVICES_HEALTHY, INSTANCE_GROUP_NAME_ACTIONABLE);
List<InstanceMetaData> instancesActionableStopped = generateInstances(10, 200, InstanceStatus.STOPPED, INSTANCE_GROUP_NAME_ACTIONABLE);
List<InstanceMetaData> instancesRandomNotStopped = generateInstances(3, 300, InstanceStatus.SERVICES_HEALTHY, INSTANCE_GROUP_NAME_RANDOM);
List<InstanceMetaData> instancesRandomStopped = generateInstances(8, 400, InstanceStatus.STOPPED, INSTANCE_GROUP_NAME_RANDOM);
List<InstanceMetaData> allInstancesInHgMdList = new LinkedList<>();
allInstancesInHgMdList.addAll(instancesActionableStopped);
allInstancesInHgMdList.addAll(instancesActionableNotStopped);
List<CloudInstance> stoppedInstancesInHgList = convertToCloudInstance(instancesActionableStopped);
List<CloudInstance> allInstancesInHgList = convertToCloudInstance(allInstancesInHgMdList);
StopStartUpscaleStartInstancesRequest startInstancesRequest = new StopStartUpscaleStartInstancesRequest(cloudContext, cloudCredential, cloudStack, INSTANCE_GROUP_NAME_ACTIONABLE, stoppedInstancesInHgList, allInstancesInHgList, Collections.emptyList(), stopStartUpscaleContext.getAdjustment());
List<CloudVmInstanceStatus> affectedInstances = constructVmInstanceStatusWithTerminated(stoppedInstancesInHgList, adjustment, 2);
int expectedCount = adjustment - 2;
StopStartUpscaleStartInstancesResult payload = new StopStartUpscaleStartInstancesResult(STACK_ID, startInstancesRequest, affectedInstances);
// Mocks
mockStackEtc(instancesActionableNotStopped, instancesActionableStopped, instancesRandomNotStopped, instancesRandomStopped);
when(reactorEventFactory.createEvent(anyMap(), isNotNull())).thenReturn(event);
new AbstractActionTestSupport<>(action).doExecute(stopStartUpscaleContext, payload, Collections.emptyMap());
ArgumentCaptor<List> instancesCaptor = ArgumentCaptor.forClass(List.class);
verify(stopStartUpscaleFlowService).instancesStarted(eq(STACK_ID), instancesCaptor.capture());
Assert.assertEquals(expectedCount, instancesCaptor.getValue().size());
verify(stopStartUpscaleFlowService).logInstancesFailedToStart(eq(STACK_ID), instancesCaptor.capture());
Assert.assertEquals(2, instancesCaptor.getValue().size());
verify(stopStartUpscaleFlowService).warnNotEnoughInstances(eq(STACK_ID), eq(INSTANCE_GROUP_NAME_ACTIONABLE), eq(adjustment), eq(expectedCount));
verify(stopStartUpscaleFlowService).upscaleCommissioningNodes(eq(STACK_ID), eq(INSTANCE_GROUP_NAME_ACTIONABLE), instancesCaptor.capture(), eq(Collections.emptyList()));
Assert.assertEquals(expectedCount, instancesCaptor.getValue().size());
verifyNoMoreInteractions(stopStartUpscaleFlowService);
ArgumentCaptor<Object> argumentCaptor = ArgumentCaptor.forClass(Object.class);
verify(reactorEventFactory).createEvent(anyMap(), argumentCaptor.capture());
verify(eventBus).notify("STOPSTARTUPSCALECOMMISSIONVIACMREQUEST", event);
assertThat(argumentCaptor.getValue()).isInstanceOf(StopStartUpscaleCommissionViaCMRequest.class);
StopStartUpscaleCommissionViaCMRequest req = (StopStartUpscaleCommissionViaCMRequest) argumentCaptor.getValue();
Assert.assertEquals(expectedCount, req.getStartedInstancesToCommission().size());
}
use of com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult in project cloudbreak by hortonworks.
the class StopStartUpscaleActionsTest method testCmCommissionAction.
@Test
void testCmCommissionAction() throws Exception {
// Simple scenario. Adequate instances. Everything started etc.
AbstractStopStartUpscaleActions<StopStartUpscaleStartInstancesResult> action = (AbstractStopStartUpscaleActions<StopStartUpscaleStartInstancesResult>) underTest.cmCommissionAction();
initActionPrivateFields(action);
int adjustment = 5;
StopStartUpscaleContext stopStartUpscaleContext = createContext(adjustment);
List<InstanceMetaData> instancesActionableNotStopped = generateInstances(5, 100, InstanceStatus.SERVICES_HEALTHY, INSTANCE_GROUP_NAME_ACTIONABLE);
List<InstanceMetaData> instancesActionableStopped = generateInstances(10, 200, InstanceStatus.STOPPED, INSTANCE_GROUP_NAME_ACTIONABLE);
List<InstanceMetaData> instancesRandomNotStopped = generateInstances(3, 300, InstanceStatus.SERVICES_HEALTHY, INSTANCE_GROUP_NAME_RANDOM);
List<InstanceMetaData> instancesRandomStopped = generateInstances(8, 400, InstanceStatus.STOPPED, INSTANCE_GROUP_NAME_RANDOM);
List<InstanceMetaData> allInstancesInHgMdList = new LinkedList<>();
allInstancesInHgMdList.addAll(instancesActionableStopped);
allInstancesInHgMdList.addAll(instancesActionableNotStopped);
List<CloudInstance> stoppedInstancesInHgList = convertToCloudInstance(instancesActionableStopped);
List<CloudInstance> allInstancesInHgList = convertToCloudInstance(allInstancesInHgMdList);
StopStartUpscaleStartInstancesRequest startInstancesRequest = new StopStartUpscaleStartInstancesRequest(cloudContext, cloudCredential, cloudStack, INSTANCE_GROUP_NAME_ACTIONABLE, stoppedInstancesInHgList, allInstancesInHgList, Collections.emptyList(), stopStartUpscaleContext.getAdjustment());
List<CloudVmInstanceStatus> affectedInstances = constructStartedCloudVmInstanceStatus(stoppedInstancesInHgList, adjustment);
StopStartUpscaleStartInstancesResult payload = new StopStartUpscaleStartInstancesResult(STACK_ID, startInstancesRequest, affectedInstances);
// Mocks
mockStackEtc(instancesActionableNotStopped, instancesActionableStopped, instancesRandomNotStopped, instancesRandomStopped);
when(reactorEventFactory.createEvent(anyMap(), isNotNull())).thenReturn(event);
new AbstractActionTestSupport<>(action).doExecute(stopStartUpscaleContext, payload, Collections.emptyMap());
ArgumentCaptor<List> instancesCaptor = ArgumentCaptor.forClass(List.class);
verify(stopStartUpscaleFlowService).instancesStarted(eq(STACK_ID), instancesCaptor.capture());
Assert.assertEquals(adjustment, instancesCaptor.getValue().size());
verify(stopStartUpscaleFlowService).upscaleCommissioningNodes(eq(STACK_ID), eq(INSTANCE_GROUP_NAME_ACTIONABLE), instancesCaptor.capture(), eq(Collections.emptyList()));
Assert.assertEquals(adjustment, instancesCaptor.getValue().size());
verifyNoMoreInteractions(stopStartUpscaleFlowService);
ArgumentCaptor<Object> argumentCaptor = ArgumentCaptor.forClass(Object.class);
verify(reactorEventFactory).createEvent(anyMap(), argumentCaptor.capture());
verify(eventBus).notify("STOPSTARTUPSCALECOMMISSIONVIACMREQUEST", event);
assertThat(argumentCaptor.getValue()).isInstanceOf(StopStartUpscaleCommissionViaCMRequest.class);
StopStartUpscaleCommissionViaCMRequest req = (StopStartUpscaleCommissionViaCMRequest) argumentCaptor.getValue();
Assert.assertEquals(adjustment, req.getStartedInstancesToCommission().size());
}
use of com.sequenceiq.cloudbreak.cloud.event.instance.StopStartUpscaleStartInstancesResult in project cloudbreak by hortonworks.
the class StopStartUpscaleStartInstancesHandlerTest method testCloudProviderInstancesInTerminatedEtcStateDuringStartInternal.
private void testCloudProviderInstancesInTerminatedEtcStateDuringStartInternal(int cbStoppedInstanceCount, int numInstancesToStart) {
// This assumes behaviour on the CloudConnector API to ignore certain TERMINAL states, and provide information back on the
// instances with the state set. If that is not the behaviour (to be verified manually) - this test is pointless.
List<CloudInstance> stoppedInstancesInHg = generateCloudInstances(cbStoppedInstanceCount);
List<CloudInstance> allInstancesInHg = generateCloudInstances(10);
List<CloudInstance> startedInstancesWithServicesNotRunning = null;
int expectedInstances = Math.min(cbStoppedInstanceCount, numInstancesToStart);
StopStartUpscaleStartInstancesRequest request = new StopStartUpscaleStartInstancesRequest(cloudContext, cloudCredential, cloudStack, "compute", stoppedInstancesInHg, allInstancesInHg, startedInstancesWithServicesNotRunning, numInstancesToStart);
List<CloudInstance> stoppedInstancesArg = stoppedInstancesInHg.subList(0, expectedInstances);
List<CloudVmInstanceStatus> stoppedInstanceStatusList = generateStoppedCloudVmInstanceStatuses(stoppedInstancesInHg);
List<CloudVmInstanceStatus> startedInstanceStatusList = generateStartedCloudVmInstanceStatusesIncludingOtherStates(stoppedInstancesArg);
when(instanceConnector.checkWithoutRetry(any(AuthenticatedContext.class), eq(stoppedInstancesArg))).thenReturn(stoppedInstanceStatusList);
when(instanceConnector.startWithLimitedRetry(any(AuthenticatedContext.class), eq(null), eq(stoppedInstancesArg), any(Long.class))).thenReturn(startedInstanceStatusList);
Event event = new Event(request);
underTest.accept(event);
ArgumentCaptor<Event> resultCaptor = ArgumentCaptor.forClass(Event.class);
verify(eventBus).notify(any(Object.class), resultCaptor.capture());
verify(instanceConnector).checkWithoutRetry(any(AuthenticatedContext.class), eq(stoppedInstancesArg));
verify(instanceConnector).startWithLimitedRetry(any(AuthenticatedContext.class), eq(null), eq(stoppedInstancesInHg.subList(0, expectedInstances)), any(Long.class));
verifyNoMoreInteractions(instanceConnector);
assertEquals(1, resultCaptor.getAllValues().size());
Event resultEvent = resultCaptor.getValue();
assertEquals(StopStartUpscaleStartInstancesResult.class, resultEvent.getData().getClass());
StopStartUpscaleStartInstancesResult result = (StopStartUpscaleStartInstancesResult) resultEvent.getData();
verifyAffectedInstancesInResult3(stoppedInstancesInHg.subList(0, expectedInstances), result.getAffectedInstanceStatuses());
}
Aggregations