use of com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.AVAILABLE in project cloudbreak by hortonworks.
the class ClusterHostServiceRunner method changePrimaryGateway.
public String changePrimaryGateway(Stack stack) throws CloudbreakException {
GatewayConfig formerPrimaryGatewayConfig = gatewayConfigService.getPrimaryGatewayConfig(stack);
List<GatewayConfig> gatewayConfigs = gatewayConfigService.getAllGatewayConfigs(stack);
Optional<GatewayConfig> newPrimaryCandidate = gatewayConfigs.stream().filter(gc -> !gc.isPrimary()).findFirst();
if (newPrimaryCandidate.isPresent()) {
GatewayConfig newPrimary = newPrimaryCandidate.get();
Set<Node> allNodes = stackUtil.collectNodes(stack);
try {
hostOrchestrator.changePrimaryGateway(formerPrimaryGatewayConfig, newPrimary, gatewayConfigs, allNodes, clusterDeletionBasedModel(stack.getId(), stack.getCluster().getId()));
return newPrimary.getHostname();
} catch (CloudbreakOrchestratorException ex) {
throw new CloudbreakException(ex);
}
} else {
throw new CloudbreakException("Primary gateway change is not possible because there is no available node for the action");
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.AVAILABLE in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<StopStartDownscaleDecommissionViaCMRequest> event) {
StopStartDownscaleDecommissionViaCMRequest request = event.getData();
LOGGER.info("StopStartDownscaleDecommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event.getData());
try {
Stack stack = stackService.getByIdWithLists(request.getResourceId());
Cluster cluster = stack.getCluster();
ClusterDecomissionService clusterDecomissionService = clusterApiConnectors.getConnector(stack).clusterDecomissionService();
Set<String> hostNames = getHostNamesForPrivateIds(request.getInstanceIdsToDecommission(), stack);
LOGGER.debug("Attempting to decommission hosts. count={}, hostnames={}", hostNames.size(), hostNames);
HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
Map<String, InstanceMetaData> hostsToRemove = clusterDecomissionService.collectHostsToRemove(hostGroup, hostNames);
List<String> missingHostsInCm = Collections.emptyList();
if (hostNames.size() != hostsToRemove.size()) {
missingHostsInCm = hostNames.stream().filter(h -> !hostsToRemove.containsKey(h)).collect(Collectors.toList());
LOGGER.info("Found fewer instances in CM to decommission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRemove.size(), hostNames.size(), missingHostsInCm);
}
// TODO CB-14929: Potentially put the nodes into maintenance mode before decommissioning?
// TODO CB-15132: Eventually, try parsing the results of the CM decommission, and see if a partial decommission went through in the
// timebound specified.
Set<String> decommissionedHostNames = Collections.emptySet();
if (hostsToRemove.size() > 0) {
decommissionedHostNames = clusterDecomissionService.decommissionClusterNodesStopStart(hostsToRemove, POLL_FOR_10_MINUTES);
updateInstanceStatuses(hostsToRemove, decommissionedHostNames, InstanceStatus.DECOMMISSIONED, "decommission requested for instances");
}
// This doesn't handle failures. It handles scenarios where CM list APIs don't have the necessary hosts available.
List<String> allMissingHostnames = null;
if (missingHostsInCm.size() > 0) {
allMissingHostnames = new LinkedList<>(missingHostsInCm);
}
if (hostsToRemove.size() != decommissionedHostNames.size()) {
Set<String> finalDecommissionedHostnames = decommissionedHostNames;
List<String> additionalMissingDecommissionHostnames = hostsToRemove.keySet().stream().filter(h -> !finalDecommissionedHostnames.contains(h)).collect(Collectors.toList());
LOGGER.info("Decommissioned fewer instances than requested. decommissionedCount={}, expectedCount={}, initialCount={}, notDecommissioned=[{}]", decommissionedHostNames.size(), hostsToRemove.size(), hostNames.size(), additionalMissingDecommissionHostnames);
if (allMissingHostnames == null) {
allMissingHostnames = new LinkedList<>();
}
allMissingHostnames.addAll(additionalMissingDecommissionHostnames);
}
LOGGER.info("hostsDecommissioned: count={}, hostNames={}", decommissionedHostNames.size(), decommissionedHostNames);
if (decommissionedHostNames.size() > 0) {
LOGGER.debug("Attempting to put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTERINGCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
clusterDecomissionService.enterMaintenanceMode(decommissionedHostNames);
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTEREDCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
LOGGER.debug("Successfully put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
} else {
LOGGER.debug("No nodes decommissioned, hence no nodes being put into maintenance mode");
}
return new StopStartDownscaleDecommissionViaCMResult(request, decommissionedHostNames, allMissingHostnames);
} catch (Exception e) {
// TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
// ClusterClientInitException is one which is explicitly thrown.
String message = "Failed while attempting to decommission nodes via CM";
LOGGER.error(message, e);
return new StopStartDownscaleDecommissionViaCMResult(message, e, request);
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.AVAILABLE in project cloudbreak by hortonworks.
the class DistroXService method validate.
private void validate(DistroXV1Request request) {
DetailedEnvironmentResponse environment = Optional.ofNullable(environmentClientService.getByName(request.getEnvironmentName())).orElseThrow(() -> new BadRequestException("No environment name provided hence unable to obtain some important data"));
if (environment == null) {
throw new BadRequestException(format("'%s' Environment does not exist.", request.getEnvironmentName()));
}
DescribeFreeIpaResponse freeipa = freeipaClientService.getByEnvironmentCrn(environment.getCrn());
if (freeipa == null || freeipa.getAvailabilityStatus() == null || !freeipa.getAvailabilityStatus().isAvailable()) {
throw new BadRequestException(format("If you want to provision a Data Hub then the FreeIPA instance must be running in the '%s' Environment.", environment.getName()));
}
Set<String> sdxCrns = platformAwareSdxConnector.listSdxCrns(environment.getName(), environment.getCrn());
if (sdxCrns.isEmpty()) {
throw new BadRequestException(format("Data Lake stack cannot be found for environment CRN: %s (%s)", environment.getName(), environment.getCrn()));
}
Set<Pair<String, StatusCheckResult>> sdxCrnsWithAvailability = platformAwareSdxConnector.listSdxCrnsWithAvailability(environment.getName(), environment.getCrn(), sdxCrns);
if (!sdxCrnsWithAvailability.stream().map(Pair::getValue).allMatch(statusCheckResult -> StatusCheckResult.AVAILABLE.equals(statusCheckResult))) {
throw new BadRequestException("Data Lake stacks of environment should be available.");
}
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.AVAILABLE in project cloudbreak by hortonworks.
the class SdxRuntimeUpgradeServiceTest method testCheckForUpgradeByCrnWhenDisabledAndPatchUpdatesAvailable.
@Test
@DisplayName("Test checkForUpgradeByCrn() when Runtime Upgrade is disabled and patch updates are available")
public void testCheckForUpgradeByCrnWhenDisabledAndPatchUpdatesAvailable() {
ImageInfoV4Response currentImageInfo = new ImageInfoV4Response();
currentImageInfo.setImageId(IMAGE_ID);
currentImageInfo.setCreated(1);
currentImageInfo.setComponentVersions(creatImageComponentVersions(MATCHING_TARGET_RUNTIME, MATCHING_TARGET_RUNTIME));
ImageInfoV4Response imageInfo = new ImageInfoV4Response();
imageInfo.setImageId(IMAGE_ID);
imageInfo.setCreated(1);
imageInfo.setComponentVersions(creatImageComponentVersions(MATCHING_TARGET_RUNTIME, MATCHING_TARGET_RUNTIME));
ImageInfoV4Response imageInfo2 = new ImageInfoV4Response();
imageInfo2.setImageId(IMAGE_ID_LAST);
imageInfo2.setCreated(2);
imageInfo2.setComponentVersions(creatImageComponentVersions("7.3.0", "7.3.0"));
List<ImageInfoV4Response> candidates = List.of(imageInfo, imageInfo2);
response.setCurrent(currentImageInfo);
response.setUpgradeCandidates(candidates);
SdxUpgradeResponse expectedResponse = new SdxUpgradeResponse(response.getCurrent(), candidates, response.getReason(), response.getFlowIdentifier());
when(sdxService.getByCrn(anyString(), anyString())).thenReturn(sdxCluster);
when(entitlementService.runtimeUpgradeEnabled(any())).thenReturn(false);
when(stackV4Endpoint.checkForClusterUpgradeByName(anyLong(), anyString(), any(), anyString())).thenReturn(response);
when(regionAwareInternalCrnGenerator.getInternalCrnForServiceAsString()).thenReturn("crn");
when(regionAwareInternalCrnGeneratorFactory.iam()).thenReturn(regionAwareInternalCrnGenerator);
ArgumentCaptor<UpgradeV4Response> upgradeV4ResponseCaptor = ArgumentCaptor.forClass(UpgradeV4Response.class);
when(sdxUpgradeClusterConverter.upgradeResponseToSdxUpgradeResponse(upgradeV4ResponseCaptor.capture())).thenReturn(expectedResponse);
SdxUpgradeResponse actualResponse = underTest.checkForUpgradeByCrn(USER_CRN, STACK_CRN, new SdxUpgradeRequest(), ACCOUNT_ID);
UpgradeV4Response capturedUpgradeV4Response = upgradeV4ResponseCaptor.getValue();
assertEquals(expectedResponse, actualResponse);
assertTrue(StringUtils.isEmpty(capturedUpgradeV4Response.getReason()));
assertEquals(1, capturedUpgradeV4Response.getUpgradeCandidates().size());
assertEquals(MATCHING_TARGET_RUNTIME, capturedUpgradeV4Response.getUpgradeCandidates().get(0).getComponentVersions().getCdp());
}
use of com.sequenceiq.cloudbreak.api.endpoint.v4.common.Status.AVAILABLE in project cloudbreak by hortonworks.
the class DistroXRepairTests method testEphemeralDistroXMasterRepairWithTerminatedEC2Instances.
@Test(dataProvider = TEST_CONTEXT)
@UseSpotInstances
@Description(given = "there is a running Cloudbreak, and an environment with SDX and DistroX cluster in available state", when = "recovery called on the MASTER host group of DistroX cluster, where the EC2 instance had been terminated", then = "DistroX recovery should be successful, the cluster should be up and running")
public void testEphemeralDistroXMasterRepairWithTerminatedEC2Instances(TestContext testContext) {
String distrox = resourcePropertyProvider().getName();
List<String> actualVolumeIds = new ArrayList<>();
List<String> expectedVolumeIds = new ArrayList<>();
String username = testContext.getActingUserCrn().getResource();
String sanitizedUserName = SanitizerUtil.sanitizeWorkloadUsername(username);
testContext.given(distrox, DistroXTestDto.class).withInstanceGroupsEntity(new DistroXInstanceGroupsBuilder(testContext).defaultHostGroup().withStorageOptimizedInstancetype().build()).when(distroXTestClient.create(), key(distrox)).await(STACK_AVAILABLE).awaitForHealthyInstances().then(this::verifyMountedDisks).then((tc, testDto, client) -> {
CloudFunctionality cloudFunctionality = tc.getCloudProvider().getCloudFunctionality();
List<String> instancesToDelete = distroxUtil.getInstanceIds(testDto, client, MASTER.getName());
expectedVolumeIds.addAll(cloudFunctionality.listInstanceVolumeIds(testDto.getName(), instancesToDelete));
cloudFunctionality.deleteInstances(testDto.getName(), instancesToDelete);
return testDto;
}).awaitForHostGroup(MASTER.getName(), InstanceStatus.DELETED_ON_PROVIDER_SIDE).when(distroXTestClient.repair(MASTER), key(distrox)).await(STACK_AVAILABLE, key(distrox)).awaitForHealthyInstances().then(this::verifyMountedDisks).then((tc, testDto, client) -> clouderaManagerUtil.checkClouderaManagerYarnNodemanagerRoleConfigGroups(testDto, sanitizedUserName, MOCK_UMS_PASSWORD)).then((tc, testDto, client) -> {
CloudFunctionality cloudFunctionality = tc.getCloudProvider().getCloudFunctionality();
List<String> instanceIds = distroxUtil.getInstanceIds(testDto, client, MASTER.getName());
actualVolumeIds.addAll(cloudFunctionality.listInstanceVolumeIds(testDto.getName(), instanceIds));
return testDto;
}).then((tc, testDto, client) -> VolumeUtils.compareVolumeIdsAfterRepair(testDto, actualVolumeIds, expectedVolumeIds)).validate();
}
Aggregations