use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class StopStartDownscaleActions method decommissionViaCmFailedAction.
@Bean(name = "STOPSTART_DOWNSCALE_DECOMMISSION_VIA_CM_FAILED_STATE")
public Action<?, ?> decommissionViaCmFailedAction() {
return new AbstractStopStartDownscaleActions<>(StopStartDownscaleDecommissionViaCMResult.class) {
@Override
protected void doExecute(StopStartDownscaleContext context, StopStartDownscaleDecommissionViaCMResult payload, Map<Object, Object> variables) throws Exception {
LOGGER.warn("Failure during the decommissionViaCm step");
// TODO CB-14929. Should the nodes be put into an ORCHESTRATOR_FAILED state? What are the manual recovery steps from this state.
Set<String> hostnames = getHostNamesForPrivateIds(payload.getRequest().getInstanceIdsToDecommission(), context.getStack());
stopStartDownscaleFlowService.decommissionViaCmFailed(payload.getResourceId(), hostnames);
sendEvent(context, STOPSTART_DOWNSCALE_FAILURE_EVENT.event(), new StackFailureEvent(payload.getResourceId(), payload.getErrorDetails()));
}
private Set<String> getHostNamesForPrivateIds(Set<Long> hostIdsToRemove, Stack stack) {
return hostIdsToRemove.stream().map(privateId -> {
Optional<InstanceMetaData> instanceMetadata = stackService.getInstanceMetadata(stack.getInstanceMetaDataAsList(), privateId);
return instanceMetadata.map(InstanceMetaData::getDiscoveryFQDN).orElse(null);
}).filter(StringUtils::isNotEmpty).collect(Collectors.toSet());
}
};
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class StopStartUpscaleActions method cmCommissionAction.
@Bean(name = "STOPSTART_UPSCALE_HOSTS_COMMISSION_STATE")
public Action<?, ?> cmCommissionAction() {
return new AbstractStopStartUpscaleActions<>(StopStartUpscaleStartInstancesResult.class) {
@Override
protected void doExecute(StopStartUpscaleContext context, StopStartUpscaleStartInstancesResult payload, Map<Object, Object> variables) throws Exception {
// Update instance metadata for successful nodes before handling / logging info about failures.
List<CloudVmInstanceStatus> cloudVmInstanceStatusList = payload.getAffectedInstanceStatuses();
Set<String> cloudInstanceIdsStarted = cloudVmInstanceStatusList.stream().filter(x -> x.getStatus() == InstanceStatus.STARTED).map(x -> x.getCloudInstance().getInstanceId()).collect(Collectors.toUnmodifiableSet());
List<InstanceMetaData> startedInstancesMetaData = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), cloudInstanceIdsStarted);
clusterUpscaleFlowService.instancesStarted(context.getStack().getId(), startedInstancesMetaData);
handleInstanceUnsuccessfulStart(context, cloudVmInstanceStatusList);
// This list is currently empty. It could be populated later in another flow-step by querying CM to get service health.
// Meant to be a mechanism which detects cloud instances which are RUNNING, but not being utilized (likely due to previous failures)
List<CloudInstance> instancesWithServicesNotRunning = payload.getStartInstanceRequest().getStartedInstancesWithServicesNotRunning();
List<InstanceMetaData> metaDataWithServicesNotRunning = cloudInstanceIdToInstanceMetaDataConverter.getNotDeletedAndNotZombieInstances(context.getStack(), context.getHostGroupName(), instancesWithServicesNotRunning.stream().map(i -> i.getInstanceId()).collect(Collectors.toUnmodifiableSet()));
LOGGER.info("StartedInstancesCount={}, StartedInstancesMetadataCount={}," + " instancesWithServicesNotRunningCount={}, instancesWithServicesNotRunningMetadataCount={}", cloudInstanceIdsStarted.size(), startedInstancesMetaData.size(), instancesWithServicesNotRunning.size(), metaDataWithServicesNotRunning.size());
int toCommissionNodeCount = metaDataWithServicesNotRunning.size() + startedInstancesMetaData.size();
if (toCommissionNodeCount < context.getAdjustment()) {
LOGGER.warn("Not enough nodes found to commission. DesiredCount={}, availableCount={}", context.getAdjustment(), toCommissionNodeCount);
clusterUpscaleFlowService.warnNotEnoughInstances(context.getStack().getId(), context.getHostGroupName(), context.getAdjustment(), toCommissionNodeCount);
}
clusterUpscaleFlowService.upscaleCommissioningNodes(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
StopStartUpscaleCommissionViaCMRequest commissionRequest = new StopStartUpscaleCommissionViaCMRequest(context.getStack().getId(), context.getHostGroupName(), startedInstancesMetaData, metaDataWithServicesNotRunning);
sendEvent(context, commissionRequest);
}
private void handleInstanceUnsuccessfulStart(StopStartUpscaleContext context, List<CloudVmInstanceStatus> cloudVmInstanceStatusList) {
try {
List<CloudVmInstanceStatus> instancesNotInDesiredState = cloudVmInstanceStatusList.stream().filter(i -> i.getStatus() != InstanceStatus.STARTED).collect(Collectors.toList());
if (instancesNotInDesiredState.size() > 0) {
// Not updating the status of these instances in the DB. Instead letting the regular syncer threads take care of this.
// This is in case there is additional logic in the syncers while processing Instance state changes.
LOGGER.warn("Some instances could not be started: count={}, instances={}", instancesNotInDesiredState.size(), instancesNotInDesiredState);
clusterUpscaleFlowService.logInstancesFailedToStart(context.getStack().getId(), instancesNotInDesiredState);
// TODO CB-15132: Eventually, we may want to take some corrective action.
}
} catch (Exception e) {
LOGGER.warn("Failed while attempting to log info about instances which did not start. Ignoring, and letting flow proceed", e);
}
}
};
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class ChangePrimaryGatewayService method primaryGatewayChanged.
public void primaryGatewayChanged(long stackId, String newPrimaryGatewayFQDN) throws CloudbreakException, TransactionExecutionException {
LOGGER.info("Update primary gateway ip");
Set<InstanceMetaData> imds = instanceMetaDataService.findNotTerminatedAndNotZombieForStack(stackId);
Optional<InstanceMetaData> formerPrimaryGateway = imds.stream().filter(imd -> imd.getInstanceMetadataType() == InstanceMetadataType.GATEWAY_PRIMARY).findFirst();
Optional<InstanceMetaData> newPrimaryGateway = imds.stream().filter(imd -> imd.getDiscoveryFQDN() != null && imd.getDiscoveryFQDN().equals(newPrimaryGatewayFQDN)).findFirst();
if (newPrimaryGateway.isPresent() && formerPrimaryGateway.isPresent()) {
InstanceMetaData fpg = formerPrimaryGateway.get();
fpg.setInstanceMetadataType(InstanceMetadataType.GATEWAY);
fpg.setServer(Boolean.FALSE);
transactionService.required(() -> {
instanceMetaDataService.save(fpg);
InstanceMetaData npg = newPrimaryGateway.get();
npg.setInstanceMetadataType(InstanceMetadataType.GATEWAY_PRIMARY);
npg.setServer(Boolean.TRUE);
instanceMetaDataService.save(npg);
Stack updatedStack = stackService.getByIdWithListsInTransaction(stackId);
String gatewayIp = gatewayConfigService.getPrimaryGatewayIp(updatedStack);
Cluster cluster = updatedStack.getCluster();
cluster.setClusterManagerIp(gatewayIp);
LOGGER.info("Primary gateway IP has been updated to: '{}'", gatewayIp);
clusterService.save(cluster);
clusterPublicEndpointManagementService.changeGateway(updatedStack);
return null;
});
} else {
throw new CloudbreakException("Primary gateway change was not successful.");
}
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class ClusterStartService method updateInstancesToHealthy.
private void updateInstancesToHealthy(StackView stack) {
try {
transactionService.required(() -> {
Set<InstanceMetaData> instances = instanceMetaDataService.findNotTerminatedAndNotZombieForStack(stack.getId());
for (InstanceMetaData metaData : instances) {
metaData.setInstanceStatus(com.sequenceiq.cloudbreak.api.endpoint.v4.stacks.base.InstanceStatus.SERVICES_HEALTHY);
}
instanceMetaDataService.saveAll(instances);
});
} catch (TransactionExecutionException e) {
throw new TransactionRuntimeExecutionException(e);
}
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class ClusterUpscaleFlowService method updateMissingHostMetaDatas.
private int updateMissingHostMetaDatas(Collection<String> successHosts, Iterable<InstanceMetaData> instanceMetaDatas, InstanceStatus instanceStatus) {
int failedHosts = 0;
for (InstanceMetaData metaData : instanceMetaDatas) {
if (!successHosts.contains(metaData.getDiscoveryFQDN())) {
instanceMetaDataService.updateInstanceStatus(metaData, instanceStatus, "Cluster upscale failed. Host does not have fqdn.");
failedHosts++;
}
}
return failedHosts;
}
Aggregations