use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandler method updateInstanceStatuses.
private void updateInstanceStatuses(Map<String, InstanceMetaData> instanceMetadataMap, Set<String> fqdnsRemoved, InstanceStatus instanceStatus, String statusReason) {
for (String fqdn : fqdnsRemoved) {
InstanceMetaData instanceMetaData = instanceMetadataMap.get(fqdn);
if (instanceMetaData == null) {
throw new RuntimeException(String.format("Unexpected fqdn decommissioned. Not present in requsted list. unexpected fqdn=[%s], ExpectedSet=[%s]", fqdn, instanceMetadataMap));
}
instanceMetaDataService.updateInstanceStatus(instanceMetaData, instanceStatus, statusReason);
}
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class StopStartDownscaleDecommissionViaCMHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<StopStartDownscaleDecommissionViaCMRequest> event) {
StopStartDownscaleDecommissionViaCMRequest request = event.getData();
LOGGER.info("StopStartDownscaleDecommissionViaCMHandler for: {}, {}", event.getData().getResourceId(), event.getData());
try {
Stack stack = stackService.getByIdWithLists(request.getResourceId());
Cluster cluster = stack.getCluster();
ClusterDecomissionService clusterDecomissionService = clusterApiConnectors.getConnector(stack).clusterDecomissionService();
Set<String> hostNames = getHostNamesForPrivateIds(request.getInstanceIdsToDecommission(), stack);
LOGGER.debug("Attempting to decommission hosts. count={}, hostnames={}", hostNames.size(), hostNames);
HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), request.getHostGroupName()).orElseThrow(NotFoundException.notFound("hostgroup", request.getHostGroupName()));
Map<String, InstanceMetaData> hostsToRemove = clusterDecomissionService.collectHostsToRemove(hostGroup, hostNames);
List<String> missingHostsInCm = Collections.emptyList();
if (hostNames.size() != hostsToRemove.size()) {
missingHostsInCm = hostNames.stream().filter(h -> !hostsToRemove.containsKey(h)).collect(Collectors.toList());
LOGGER.info("Found fewer instances in CM to decommission, as compared to initial ask. foundCount={}, initialCount={}, missingHostsInCm={}", hostsToRemove.size(), hostNames.size(), missingHostsInCm);
}
// TODO CB-14929: Potentially put the nodes into maintenance mode before decommissioning?
// TODO CB-15132: Eventually, try parsing the results of the CM decommission, and see if a partial decommission went through in the
// timebound specified.
Set<String> decommissionedHostNames = Collections.emptySet();
if (hostsToRemove.size() > 0) {
decommissionedHostNames = clusterDecomissionService.decommissionClusterNodesStopStart(hostsToRemove, POLL_FOR_10_MINUTES);
updateInstanceStatuses(hostsToRemove, decommissionedHostNames, InstanceStatus.DECOMMISSIONED, "decommission requested for instances");
}
// This doesn't handle failures. It handles scenarios where CM list APIs don't have the necessary hosts available.
List<String> allMissingHostnames = null;
if (missingHostsInCm.size() > 0) {
allMissingHostnames = new LinkedList<>(missingHostsInCm);
}
if (hostsToRemove.size() != decommissionedHostNames.size()) {
Set<String> finalDecommissionedHostnames = decommissionedHostNames;
List<String> additionalMissingDecommissionHostnames = hostsToRemove.keySet().stream().filter(h -> !finalDecommissionedHostnames.contains(h)).collect(Collectors.toList());
LOGGER.info("Decommissioned fewer instances than requested. decommissionedCount={}, expectedCount={}, initialCount={}, notDecommissioned=[{}]", decommissionedHostNames.size(), hostsToRemove.size(), hostNames.size(), additionalMissingDecommissionHostnames);
if (allMissingHostnames == null) {
allMissingHostnames = new LinkedList<>();
}
allMissingHostnames.addAll(additionalMissingDecommissionHostnames);
}
LOGGER.info("hostsDecommissioned: count={}, hostNames={}", decommissionedHostNames.size(), decommissionedHostNames);
if (decommissionedHostNames.size() > 0) {
LOGGER.debug("Attempting to put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTERINGCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
clusterDecomissionService.enterMaintenanceMode(decommissionedHostNames);
flowMessageService.fireEventAndLog(stack.getId(), UPDATE_IN_PROGRESS.name(), CLUSTER_SCALING_STOPSTART_DOWNSCALE_ENTEREDCMMAINTMODE, String.valueOf(decommissionedHostNames.size()));
LOGGER.debug("Successfully put decommissioned hosts into maintenance mode. count={}", decommissionedHostNames.size());
} else {
LOGGER.debug("No nodes decommissioned, hence no nodes being put into maintenance mode");
}
return new StopStartDownscaleDecommissionViaCMResult(request, decommissionedHostNames, allMissingHostnames);
} catch (Exception e) {
// TODO CB-15132: This can be improved based on where and when the Exception occurred to potentially rollback certain aspects.
// ClusterClientInitException is one which is explicitly thrown.
String message = "Failed while attempting to decommission nodes via CM";
LOGGER.error(message, e);
return new StopStartDownscaleDecommissionViaCMResult(message, e, request);
}
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class RestartClusterManagerServerHandler method restartCMServer.
private void restartCMServer(Stack stack) throws Exception {
Cluster cluster = stack.getCluster();
InstanceMetaData gatewayInstance = stack.getPrimaryGatewayInstance();
GatewayConfig gatewayConfig = gatewayConfigService.getGatewayConfig(stack, gatewayInstance, cluster.hasGateway());
Set<String> gatewayFQDN = Collections.singleton(gatewayInstance.getDiscoveryFQDN());
ExitCriteriaModel exitModel = ClusterDeletionBasedExitCriteriaModel.clusterDeletionBasedModel(stack.getId(), cluster.getId());
Set<Node> nodes = stackUtil.collectReachableNodes(stack);
Set<String> agentTargets = nodes.stream().map(Node::getHostname).collect(Collectors.toSet());
hostOrchestrator.updateAgentCertDirectoryPermission(gatewayConfig, agentTargets, nodes, exitModel);
hostOrchestrator.restartClusterManagerOnMaster(gatewayConfig, gatewayFQDN, nodes, exitModel);
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class DatabaseBackupHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<DatabaseBackupRequest> event) {
DatabaseBackupRequest request = event.getData();
Selectable result;
Long stackId = request.getResourceId();
LOGGER.debug("Backing up database on stack {}, backup id {}", stackId, request.getBackupId());
try {
Stack stack = stackService.getByIdWithListsInTransaction(stackId);
Cluster cluster = stack.getCluster();
InstanceMetaData gatewayInstance = stack.getPrimaryGatewayInstance();
GatewayConfig gatewayConfig = gatewayConfigService.getGatewayConfig(stack, gatewayInstance, cluster.hasGateway());
Set<String> gatewayFQDN = Collections.singleton(gatewayInstance.getDiscoveryFQDN());
ExitCriteriaModel exitModel = ClusterDeletionBasedExitCriteriaModel.clusterDeletionBasedModel(stackId, cluster.getId());
String rangerAdminGroup = rangerVirtualGroupService.getRangerVirtualGroup(stack);
SaltConfig saltConfig = saltConfigGenerator.createSaltConfig(request.getBackupLocation(), request.getBackupId(), rangerAdminGroup, request.getCloseConnections(), stack);
hostOrchestrator.backupDatabase(gatewayConfig, gatewayFQDN, stackUtil.collectReachableNodes(stack), saltConfig, exitModel);
result = new DatabaseBackupSuccess(stackId);
} catch (Exception e) {
LOGGER.error("Database backup event failed", e);
result = new DatabaseBackupFailedEvent(stackId, e, DetailedStackStatus.DATABASE_BACKUP_FAILED);
}
return result;
}
use of com.sequenceiq.cloudbreak.domain.stack.instance.InstanceMetaData in project cloudbreak by hortonworks.
the class RegisterPublicDnsHandler method doAccept.
@Override
protected Selectable doAccept(HandlerEvent<RegisterPublicDnsRequest> event) {
RegisterPublicDnsRequest request = event.getData();
Stack stack = request.getStack();
if (gatewayPublicEndpointManagementService.isCertRenewalTriggerable(stack)) {
try {
LOGGER.debug("Fetching instance group and instance metadata for stack.");
InstanceGroup instanceGroup = instanceGroupService.getPrimaryGatewayInstanceGroupByStackId(stack.getId());
List<InstanceMetaData> instanceMetaData = instanceMetaDataService.findAliveInstancesInInstanceGroup(instanceGroup.getId());
if (!instanceMetaData.isEmpty()) {
stack.getInstanceGroups().stream().filter(ig -> ig.getId().equals(instanceGroup.getId())).forEach(ig -> ig.setInstanceMetaData(Set.copyOf(instanceMetaData)));
LOGGER.debug("Registering load balancer public DNS entry");
boolean success = clusterPublicEndpointManagementService.provisionLoadBalancer(stack);
if (!success) {
throw new CloudbreakException("Public DNS registration resulted in failed state. Please consult DNS registration logs.");
}
LOGGER.debug("Load balancer public DNS registration was successful");
return new RegisterPublicDnsSuccess(stack);
} else {
throw new CloudbreakException("Unable to find instance metadata for primary instance group. Certificates cannot " + "be updated.");
}
} catch (Exception e) {
LOGGER.warn("Failed to register load balancer public DNS entries.", e);
return new RegisterPublicDnsFailure(request.getResourceId(), e);
}
} else {
LOGGER.info("Certificates and DNS are not managed by PEM for stack {}. Skipping public DNS registration.", stack.getName());
return new RegisterPublicDnsSuccess(stack);
}
}
Aggregations