use of com.cloud.dc.HostPodVO in project cosmic by MissionCriticalCloud.
the class FirstFitPlanner method orderClusters.
@Override
public List<Long> orderClusters(final VirtualMachineProfile vmProfile, final DeploymentPlan plan, final ExcludeList avoid) throws InsufficientServerCapacityException {
final VirtualMachine vm = vmProfile.getVirtualMachine();
final Zone zone = zoneRepository.findOne(vm.getDataCenterId());
// check if datacenter is in avoid set
if (avoid.shouldAvoid(zone)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("DataCenter id = '" + zone.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
}
return null;
}
List<Long> clusterList = new ArrayList<>();
if (plan.getClusterId() != null) {
final Long clusterIdSpecified = plan.getClusterId();
s_logger.debug("Searching resources only under specified Cluster: " + clusterIdSpecified);
final ClusterVO cluster = clusterDao.findById(plan.getClusterId());
if (cluster != null) {
if (avoid.shouldAvoid(cluster)) {
s_logger.debug("The specified cluster is in avoid set, returning.");
} else {
clusterList.add(clusterIdSpecified);
removeClustersCrossingThreshold(clusterList, avoid, vmProfile, plan);
}
} else {
s_logger.debug("The specified cluster cannot be found, returning.");
avoid.addCluster(plan.getClusterId());
return null;
}
} else if (plan.getPodId() != null) {
// consider clusters under this pod only
final Long podIdSpecified = plan.getPodId();
s_logger.debug("Searching resources only under specified Pod: " + podIdSpecified);
final HostPodVO pod = podDao.findById(podIdSpecified);
if (pod != null) {
if (avoid.shouldAvoid(pod)) {
s_logger.debug("The specified pod is in avoid set, returning.");
} else {
clusterList = scanClustersForDestinationInZoneOrPod(podIdSpecified, false, vmProfile, plan, avoid);
if (clusterList == null) {
avoid.addPod(plan.getPodId());
}
}
} else {
s_logger.debug("The specified Pod cannot be found, returning.");
avoid.addPod(plan.getPodId());
return null;
}
} else {
s_logger.debug("Searching all possible resources under this Zone: " + plan.getDataCenterId());
final boolean applyAllocationAtPods = Boolean.parseBoolean(configDao.getValue(Config.ApplyAllocationAlgorithmToPods.key()));
if (applyAllocationAtPods) {
// start scan at all pods under this zone.
clusterList = scanPodsForDestination(vmProfile, plan, avoid);
} else {
// start scan at clusters under this zone.
clusterList = scanClustersForDestinationInZoneOrPod(plan.getDataCenterId(), true, vmProfile, plan, avoid);
}
}
if (clusterList != null && !clusterList.isEmpty()) {
final ServiceOffering offering = vmProfile.getServiceOffering();
// In case of non-GPU VMs, protect GPU enabled Hosts and prefer VM deployment on non-GPU Hosts.
if ((serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) == null) && !(hostGpuGroupsDao.listHostIds().isEmpty())) {
final int requiredCpu = offering.getCpu();
final long requiredRam = offering.getRamSize() * 1024L * 1024L;
reorderClustersBasedOnImplicitTags(clusterList, requiredCpu, requiredRam);
}
}
return clusterList;
}
use of com.cloud.dc.HostPodVO in project cosmic by MissionCriticalCloud.
the class HighAvailabilityManagerImpl method restart.
protected Long restart(final HaWorkVO work) {
List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
if (items.size() > 0) {
final StringBuilder str = new StringBuilder("Cancelling this work item because newer ones have been scheduled. Work Ids = [");
for (final HaWorkVO item : items) {
str.append(item.getId()).append(", ");
}
str.delete(str.length() - 2, str.length()).append("]");
s_logger.info(str.toString());
return null;
}
items = _haDao.listRunningHaWorkForVm(work.getInstanceId());
if (items.size() > 0) {
final StringBuilder str = new StringBuilder("Waiting because there's HA work being executed on an item currently. Work Ids =[");
for (final HaWorkVO item : items) {
str.append(item.getId()).append(", ");
}
str.delete(str.length() - 2, str.length()).append("]");
s_logger.info(str.toString());
return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
}
final long vmId = work.getInstanceId();
VirtualMachine vm = _itMgr.findById(work.getInstanceId());
if (vm == null) {
s_logger.info("Unable to find vm: " + vmId);
return null;
}
s_logger.info("HA on " + vm);
if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
s_logger.info("VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime());
return null;
}
AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM;
if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER;
} else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY;
} else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_SSVM;
}
HostVO host = _hostDao.findById(work.getHostId());
boolean isHostRemoved = false;
if (host == null) {
host = _hostDao.findByIdIncludingRemoved(work.getHostId());
if (host != null) {
s_logger.debug("VM " + vm.toString() + " is now no longer on host " + work.getHostId() + " as the host is removed");
isHostRemoved = true;
}
}
final Zone zone = zoneRepository.findOne(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId());
final String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + zone.getName() + ", pod: " + podVO.getName();
Boolean alive = null;
if (work.getStep() == Step.Investigating) {
if (!isHostRemoved) {
if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
return null;
}
Investigator investigator = null;
for (final Investigator it : investigators) {
investigator = it;
try {
alive = investigator.isVmAlive(vm, host);
s_logger.info(investigator.getName() + " found " + vm + " to be alive? " + alive);
break;
} catch (final UnknownVM e) {
s_logger.info(investigator.getName() + " could not find " + vm);
}
}
boolean fenced = false;
if (alive == null) {
s_logger.debug("Fencing off VM that we don't know the state of");
for (final FenceBuilder fb : fenceBuilders) {
final Boolean result = fb.fenceOff(vm, host);
s_logger.info("Fencer " + fb.getName() + " returned " + result);
if (result != null && result) {
fenced = true;
break;
}
}
} else if (!alive) {
fenced = true;
} else {
s_logger.debug("VM " + vm.getInstanceName() + " is found to be alive by " + investigator.getName());
if (host.getStatus() == Status.Up) {
s_logger.info(vm + " is alive and host is up. No need to restart it.");
return null;
} else {
s_logger.debug("Rescheduling because the host is not up but the vm is alive");
return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
}
}
if (!fenced) {
s_logger.debug("We were unable to fence off the VM " + vm);
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
}
try {
_itMgr.advanceStop(vm.getUuid(), true);
} catch (final ResourceUnavailableException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final OperationTimedoutException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final ConcurrentOperationException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
}
work.setStep(Step.Scheduled);
_haDao.update(work.getId(), work);
} else {
s_logger.debug("How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways");
try {
_itMgr.advanceStop(vm.getUuid(), true);
} catch (final ResourceUnavailableException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final OperationTimedoutException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final ConcurrentOperationException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
}
}
}
vm = _itMgr.findById(vm.getId());
if (!_forceHA && !vm.isHaEnabled()) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is not HA enabled so we're done.");
}
// VM doesn't require HA
return null;
}
if ((host == null || host.getRemoved() != null || host.getState() != Status.Up) && !volumeMgr.canVmRestartOnAnotherServer(vm.getId())) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM can not restart on another server.");
}
return null;
}
try {
final HashMap<VirtualMachineProfile.Param, Object> params = new HashMap<>();
if (_haTag != null) {
params.put(VirtualMachineProfile.Param.HaTag, _haTag);
}
final WorkType wt = work.getWorkType();
if (wt.equals(WorkType.HA)) {
params.put(VirtualMachineProfile.Param.HaOperation, true);
}
try {
// First try starting the vm with its original planner, if it doesn't succeed send HAPlanner as its an emergency.
_itMgr.advanceStart(vm.getUuid(), params, null);
} catch (final InsufficientCapacityException e) {
s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner");
_itMgr.advanceStart(vm.getUuid(), params, _haPlanners.get(0));
}
final VMInstanceVO started = _instanceDao.findById(vm.getId());
if (started != null && started.getState() == VirtualMachine.State.Running) {
s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
return null;
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
}
} catch (final InsufficientCapacityException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
} catch (final ResourceUnavailableException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
} catch (final ConcurrentOperationException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
} catch (final OperationTimedoutException e) {
s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
}
vm = _itMgr.findById(vm.getId());
work.setUpdateTime(vm.getUpdated());
work.setPreviousState(vm.getState());
return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
}
use of com.cloud.dc.HostPodVO in project cosmic by MissionCriticalCloud.
the class HighAvailabilityManagerImpl method scheduleRestartForVmsOnHost.
@Override
public void scheduleRestartForVmsOnHost(final HostVO host, final boolean investigate) {
if (host.getType() != Host.Type.Routing) {
return;
}
s_logger.warn("Scheduling restart for VMs on host " + host.getId() + "-" + host.getName());
final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId());
final Zone zone = zoneRepository.findOne(host.getDataCenterId());
// send an email alert that the host is down
StringBuilder sb = null;
final List<VMInstanceVO> reorderedVMList = new ArrayList<>();
if (vms != null && !vms.isEmpty()) {
sb = new StringBuilder();
sb.append(" Starting HA on the following VMs:");
// collect list of vm names for the alert email
for (int i = 0; i < vms.size(); i++) {
final VMInstanceVO vm = vms.get(i);
if (vm.getType() == VirtualMachine.Type.User) {
reorderedVMList.add(vm);
} else {
reorderedVMList.add(0, vm);
}
if (vm.isHaEnabled()) {
sb.append(" " + vm.getHostName());
}
}
}
// send an email alert that the host is down, include VMs
final HostPodVO podVO = _podDao.findById(host.getPodId());
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + zone.getName() + ", pod: " + podVO.getName();
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host is down, " + hostDesc, "Host [" + hostDesc + "] is down." + (sb != null ? sb.toString() : ""));
for (VMInstanceVO vm : reorderedVMList) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Notifying HA Mgr of to restart vm " + vm.getId() + "-" + vm.getInstanceName());
}
vm = _instanceDao.findByUuid(vm.getUuid());
final Long hostId = vm.getHostId();
if (hostId != null && !hostId.equals(host.getId())) {
s_logger.debug("VM " + vm.getInstanceName() + " is not on down host " + host.getId() + " it is on other host " + hostId + " VM HA is done");
continue;
}
scheduleRestart(vm, investigate);
}
}
use of com.cloud.dc.HostPodVO in project cosmic by MissionCriticalCloud.
the class ManagementServerImpl method searchForPods.
@Override
public Pair<List<? extends Pod>, Integer> searchForPods(final ListPodsByCmd cmd) {
final String podName = cmd.getPodName();
final Long id = cmd.getId();
Long zoneId = cmd.getZoneId();
final Object keyword = cmd.getKeyword();
final Object allocationState = cmd.getAllocationState();
zoneId = _accountMgr.checkAccessAndSpecifyAuthority(CallContext.current().getCallingAccount(), zoneId);
final Filter searchFilter = new Filter(HostPodVO.class, "dataCenterId", true, cmd.getStartIndex(), cmd.getPageSizeVal());
final SearchBuilder<HostPodVO> sb = _hostPodDao.createSearchBuilder();
sb.and("id", sb.entity().getId(), SearchCriteria.Op.EQ);
sb.and("name", sb.entity().getName(), SearchCriteria.Op.LIKE);
sb.and("dataCenterId", sb.entity().getDataCenterId(), SearchCriteria.Op.EQ);
sb.and("allocationState", sb.entity().getAllocationState(), SearchCriteria.Op.EQ);
final SearchCriteria<HostPodVO> sc = sb.create();
if (keyword != null) {
final SearchCriteria<HostPodVO> ssc = _hostPodDao.createSearchCriteria();
ssc.addOr("name", SearchCriteria.Op.LIKE, "%" + keyword + "%");
ssc.addOr("description", SearchCriteria.Op.LIKE, "%" + keyword + "%");
sc.addAnd("name", SearchCriteria.Op.SC, ssc);
}
if (id != null) {
sc.setParameters("id", id);
}
if (podName != null) {
sc.setParameters("name", "%" + podName + "%");
}
if (zoneId != null) {
sc.setParameters("dataCenterId", zoneId);
}
if (allocationState != null) {
sc.setParameters("allocationState", allocationState);
}
final Pair<List<HostPodVO>, Integer> result = _hostPodDao.searchAndCount(sc, searchFilter);
return new Pair<>(result.first(), result.second());
}
use of com.cloud.dc.HostPodVO in project cosmic by MissionCriticalCloud.
the class ResourceManagerImpl method createHostVO.
protected HostVO createHostVO(final StartupCommand[] cmds, final ServerResource resource, final Map<String, String> details, List<String> hostTags, final ResourceStateAdapter.Event stateEvent) {
final StartupCommand startup = cmds[0];
HostVO host = findHostByGuid(startup.getGuid());
boolean isNew = false;
if (host == null) {
host = findHostByGuid(startup.getGuidWithoutResource());
}
if (host == null) {
host = new HostVO(startup.getGuid());
isNew = true;
}
String dataCenter = startup.getDataCenter();
String pod = startup.getPod();
final String cluster = startup.getCluster();
if (pod != null && dataCenter != null && pod.equalsIgnoreCase("default") && dataCenter.equalsIgnoreCase("default")) {
final List<HostPodVO> pods = _podDao.listAllIncludingRemoved();
for (final HostPodVO hpv : pods) {
if (checkCIDR(hpv, startup.getPrivateIpAddress(), startup.getPrivateNetmask())) {
pod = hpv.getName();
dataCenter = _dcDao.findById(hpv.getDataCenterId()).getName();
break;
}
}
}
long dcId;
DataCenterVO dc = _dcDao.findByName(dataCenter);
if (dc == null) {
try {
dcId = Long.parseLong(dataCenter);
dc = _dcDao.findById(dcId);
} catch (final NumberFormatException e) {
s_logger.debug("Cannot parse " + dataCenter + " into Long.");
}
}
if (dc == null) {
throw new IllegalArgumentException("Host " + startup.getPrivateIpAddress() + " sent incorrect data center: " + dataCenter);
}
dcId = dc.getId();
HostPodVO p = _podDao.findByName(pod, dcId);
if (p == null) {
try {
final long podId = Long.parseLong(pod);
p = _podDao.findById(podId);
} catch (final NumberFormatException e) {
s_logger.debug("Cannot parse " + pod + " into Long.");
}
}
/*
* ResourceStateAdapter is responsible for throwing Exception if Pod is
* null and non-null is required. for example, XcpServerDiscoever.
*/
final Long podId = p == null ? null : p.getId();
Long clusterId = null;
if (cluster != null) {
try {
clusterId = Long.valueOf(cluster);
} catch (final NumberFormatException e) {
if (podId != null) {
ClusterVO c = _clusterDao.findBy(cluster, podId.longValue());
if (c == null) {
c = new ClusterVO(dcId, podId.longValue(), cluster);
c = _clusterDao.persist(c);
}
clusterId = c.getId();
}
}
}
if (startup instanceof StartupRoutingCommand) {
final StartupRoutingCommand ssCmd = (StartupRoutingCommand) startup;
final List<String> implicitHostTags = ssCmd.getHostTags();
if (!implicitHostTags.isEmpty()) {
if (hostTags == null) {
hostTags = _hostTagsDao.gethostTags(host.getId());
}
if (hostTags != null) {
implicitHostTags.removeAll(hostTags);
hostTags.addAll(implicitHostTags);
} else {
hostTags = implicitHostTags;
}
}
}
host.setDataCenterId(dc.getId());
host.setPodId(podId);
host.setClusterId(clusterId);
host.setPrivateIpAddress(startup.getPrivateIpAddress());
host.setPrivateNetmask(startup.getPrivateNetmask());
host.setPrivateMacAddress(startup.getPrivateMacAddress());
host.setPublicIpAddress(startup.getPublicIpAddress());
host.setPublicMacAddress(startup.getPublicMacAddress());
host.setPublicNetmask(startup.getPublicNetmask());
host.setStorageIpAddress(startup.getStorageIpAddress());
host.setStorageMacAddress(startup.getStorageMacAddress());
host.setStorageNetmask(startup.getStorageNetmask());
host.setVersion(startup.getVersion());
host.setName(startup.getName());
host.setManagementServerId(_nodeId);
host.setStorageUrl(startup.getIqn());
host.setLastPinged(System.currentTimeMillis() >> 10);
host.setHostTags(hostTags);
host.setDetails(details);
if (startup.getStorageIpAddressDeux() != null) {
host.setStorageIpAddressDeux(startup.getStorageIpAddressDeux());
host.setStorageMacAddressDeux(startup.getStorageMacAddressDeux());
host.setStorageNetmaskDeux(startup.getStorageNetmaskDeux());
}
if (resource != null) {
/* null when agent is connected agent */
host.setResource(resource.getClass().getName());
}
host = (HostVO) dispatchToStateAdapters(stateEvent, true, host, cmds, resource, details, hostTags);
if (host == null) {
throw new CloudRuntimeException("No resource state adapter response");
}
if (isNew) {
host = _hostDao.persist(host);
} else {
_hostDao.update(host.getId(), host);
}
try {
resourceStateTransitTo(host, ResourceState.Event.InternalCreated, _nodeId);
/* Agent goes to Connecting status */
_agentMgr.agentStatusTransitTo(host, Status.Event.AgentConnected, _nodeId);
} catch (final Exception e) {
s_logger.debug("Cannot transmit host " + host.getId() + " to Creating state", e);
_agentMgr.agentStatusTransitTo(host, Status.Event.Error, _nodeId);
try {
resourceStateTransitTo(host, ResourceState.Event.Error, _nodeId);
} catch (final NoTransitionException e1) {
s_logger.debug("Cannot transmit host " + host.getId() + "to Error state", e);
}
}
return host;
}
Aggregations