Search in sources :

Example 61 with VirtualMachine

use of com.cloud.vm.VirtualMachine in project cosmic by MissionCriticalCloud.

the class FirstFitPlanner method orderClusters.

@Override
public List<Long> orderClusters(final VirtualMachineProfile vmProfile, final DeploymentPlan plan, final ExcludeList avoid) throws InsufficientServerCapacityException {
    final VirtualMachine vm = vmProfile.getVirtualMachine();
    final Zone zone = zoneRepository.findOne(vm.getDataCenterId());
    // check if datacenter is in avoid set
    if (avoid.shouldAvoid(zone)) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("DataCenter id = '" + zone.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
        }
        return null;
    }
    List<Long> clusterList = new ArrayList<>();
    if (plan.getClusterId() != null) {
        final Long clusterIdSpecified = plan.getClusterId();
        s_logger.debug("Searching resources only under specified Cluster: " + clusterIdSpecified);
        final ClusterVO cluster = clusterDao.findById(plan.getClusterId());
        if (cluster != null) {
            if (avoid.shouldAvoid(cluster)) {
                s_logger.debug("The specified cluster is in avoid set, returning.");
            } else {
                clusterList.add(clusterIdSpecified);
                removeClustersCrossingThreshold(clusterList, avoid, vmProfile, plan);
            }
        } else {
            s_logger.debug("The specified cluster cannot be found, returning.");
            avoid.addCluster(plan.getClusterId());
            return null;
        }
    } else if (plan.getPodId() != null) {
        // consider clusters under this pod only
        final Long podIdSpecified = plan.getPodId();
        s_logger.debug("Searching resources only under specified Pod: " + podIdSpecified);
        final HostPodVO pod = podDao.findById(podIdSpecified);
        if (pod != null) {
            if (avoid.shouldAvoid(pod)) {
                s_logger.debug("The specified pod is in avoid set, returning.");
            } else {
                clusterList = scanClustersForDestinationInZoneOrPod(podIdSpecified, false, vmProfile, plan, avoid);
                if (clusterList == null) {
                    avoid.addPod(plan.getPodId());
                }
            }
        } else {
            s_logger.debug("The specified Pod cannot be found, returning.");
            avoid.addPod(plan.getPodId());
            return null;
        }
    } else {
        s_logger.debug("Searching all possible resources under this Zone: " + plan.getDataCenterId());
        final boolean applyAllocationAtPods = Boolean.parseBoolean(configDao.getValue(Config.ApplyAllocationAlgorithmToPods.key()));
        if (applyAllocationAtPods) {
            // start scan at all pods under this zone.
            clusterList = scanPodsForDestination(vmProfile, plan, avoid);
        } else {
            // start scan at clusters under this zone.
            clusterList = scanClustersForDestinationInZoneOrPod(plan.getDataCenterId(), true, vmProfile, plan, avoid);
        }
    }
    if (clusterList != null && !clusterList.isEmpty()) {
        final ServiceOffering offering = vmProfile.getServiceOffering();
        // In case of non-GPU VMs, protect GPU enabled Hosts and prefer VM deployment on non-GPU Hosts.
        if ((serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) == null) && !(hostGpuGroupsDao.listHostIds().isEmpty())) {
            final int requiredCpu = offering.getCpu();
            final long requiredRam = offering.getRamSize() * 1024L * 1024L;
            reorderClustersBasedOnImplicitTags(clusterList, requiredCpu, requiredRam);
        }
    }
    return clusterList;
}
Also used : ClusterVO(com.cloud.dc.ClusterVO) ServiceOffering(com.cloud.offering.ServiceOffering) Zone(com.cloud.db.model.Zone) ArrayList(java.util.ArrayList) HostPodVO(com.cloud.dc.HostPodVO) VirtualMachine(com.cloud.vm.VirtualMachine)

Example 62 with VirtualMachine

use of com.cloud.vm.VirtualMachine in project cosmic by MissionCriticalCloud.

the class HighAvailabilityManagerImpl method stopVM.

protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException {
    final VirtualMachine vm = _itMgr.findById(work.getInstanceId());
    if (vm == null) {
        s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work);
        work.setStep(Step.Done);
        return null;
    }
    s_logger.info("Stopping " + vm);
    try {
        if (work.getWorkType() == WorkType.Stop) {
            _itMgr.advanceStop(vm.getUuid(), false);
            s_logger.info("Successfully stopped " + vm);
            return null;
        } else if (work.getWorkType() == WorkType.CheckStop) {
            if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) {
                s_logger.info(vm + " is different now.  Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState());
                return null;
            }
            _itMgr.advanceStop(vm.getUuid(), false);
            s_logger.info("Stop for " + vm + " was successful");
            return null;
        } else if (work.getWorkType() == WorkType.ForceStop) {
            if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) {
                s_logger.info(vm + " is different now.  Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState());
                return null;
            }
            _itMgr.advanceStop(vm.getUuid(), true);
            s_logger.info("Stop for " + vm + " was successful");
            return null;
        } else {
            assert false : "Who decided there's other steps but didn't modify the guy who does the work?";
        }
    } catch (final ResourceUnavailableException e) {
        s_logger.debug("Agnet is not available" + e.getMessage());
    } catch (final OperationTimedoutException e) {
        s_logger.debug("operation timed out: " + e.getMessage());
    }
    return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
}
Also used : OperationTimedoutException(com.cloud.exception.OperationTimedoutException) ResourceUnavailableException(com.cloud.exception.ResourceUnavailableException) VirtualMachine(com.cloud.vm.VirtualMachine)

Example 63 with VirtualMachine

use of com.cloud.vm.VirtualMachine in project cosmic by MissionCriticalCloud.

the class HighAvailabilityManagerImpl method restart.

protected Long restart(final HaWorkVO work) {
    List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
    if (items.size() > 0) {
        final StringBuilder str = new StringBuilder("Cancelling this work item because newer ones have been scheduled.  Work Ids = [");
        for (final HaWorkVO item : items) {
            str.append(item.getId()).append(", ");
        }
        str.delete(str.length() - 2, str.length()).append("]");
        s_logger.info(str.toString());
        return null;
    }
    items = _haDao.listRunningHaWorkForVm(work.getInstanceId());
    if (items.size() > 0) {
        final StringBuilder str = new StringBuilder("Waiting because there's HA work being executed on an item currently.  Work Ids =[");
        for (final HaWorkVO item : items) {
            str.append(item.getId()).append(", ");
        }
        str.delete(str.length() - 2, str.length()).append("]");
        s_logger.info(str.toString());
        return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
    }
    final long vmId = work.getInstanceId();
    VirtualMachine vm = _itMgr.findById(work.getInstanceId());
    if (vm == null) {
        s_logger.info("Unable to find vm: " + vmId);
        return null;
    }
    s_logger.info("HA on " + vm);
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
        s_logger.info("VM " + vm + " has been changed.  Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime());
        return null;
    }
    AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY;
    } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_SSVM;
    }
    HostVO host = _hostDao.findById(work.getHostId());
    boolean isHostRemoved = false;
    if (host == null) {
        host = _hostDao.findByIdIncludingRemoved(work.getHostId());
        if (host != null) {
            s_logger.debug("VM " + vm.toString() + " is now no longer on host " + work.getHostId() + " as the host is removed");
            isHostRemoved = true;
        }
    }
    final Zone zone = zoneRepository.findOne(host.getDataCenterId());
    final HostPodVO podVO = _podDao.findById(host.getPodId());
    final String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + zone.getName() + ", pod: " + podVO.getName();
    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
        if (!isHostRemoved) {
            if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
                s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
                return null;
            }
            Investigator investigator = null;
            for (final Investigator it : investigators) {
                investigator = it;
                try {
                    alive = investigator.isVmAlive(vm, host);
                    s_logger.info(investigator.getName() + " found " + vm + " to be alive? " + alive);
                    break;
                } catch (final UnknownVM e) {
                    s_logger.info(investigator.getName() + " could not find " + vm);
                }
            }
            boolean fenced = false;
            if (alive == null) {
                s_logger.debug("Fencing off VM that we don't know the state of");
                for (final FenceBuilder fb : fenceBuilders) {
                    final Boolean result = fb.fenceOff(vm, host);
                    s_logger.info("Fencer " + fb.getName() + " returned " + result);
                    if (result != null && result) {
                        fenced = true;
                        break;
                    }
                }
            } else if (!alive) {
                fenced = true;
            } else {
                s_logger.debug("VM " + vm.getInstanceName() + " is found to be alive by " + investigator.getName());
                if (host.getStatus() == Status.Up) {
                    s_logger.info(vm + " is alive and host is up. No need to restart it.");
                    return null;
                } else {
                    s_logger.debug("Rescheduling because the host is not up but the vm is alive");
                    return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
                }
            }
            if (!fenced) {
                s_logger.debug("We were unable to fence off the VM " + vm);
                _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
                return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
            }
            try {
                _itMgr.advanceStop(vm.getUuid(), true);
            } catch (final ResourceUnavailableException e) {
                assert false : "How do we hit this when force is true?";
                throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
            } catch (final OperationTimedoutException e) {
                assert false : "How do we hit this when force is true?";
                throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
            } catch (final ConcurrentOperationException e) {
                assert false : "How do we hit this when force is true?";
                throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
            }
            work.setStep(Step.Scheduled);
            _haDao.update(work.getId(), work);
        } else {
            s_logger.debug("How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways");
            try {
                _itMgr.advanceStop(vm.getUuid(), true);
            } catch (final ResourceUnavailableException e) {
                assert false : "How do we hit this when force is true?";
                throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
            } catch (final OperationTimedoutException e) {
                assert false : "How do we hit this when force is true?";
                throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
            } catch (final ConcurrentOperationException e) {
                assert false : "How do we hit this when force is true?";
                throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
            }
        }
    }
    vm = _itMgr.findById(vm.getId());
    if (!_forceHA && !vm.isHaEnabled()) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("VM is not HA enabled so we're done.");
        }
        // VM doesn't require HA
        return null;
    }
    if ((host == null || host.getRemoved() != null || host.getState() != Status.Up) && !volumeMgr.canVmRestartOnAnotherServer(vm.getId())) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("VM can not restart on another server.");
        }
        return null;
    }
    try {
        final HashMap<VirtualMachineProfile.Param, Object> params = new HashMap<>();
        if (_haTag != null) {
            params.put(VirtualMachineProfile.Param.HaTag, _haTag);
        }
        final WorkType wt = work.getWorkType();
        if (wt.equals(WorkType.HA)) {
            params.put(VirtualMachineProfile.Param.HaOperation, true);
        }
        try {
            // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner as its an emergency.
            _itMgr.advanceStart(vm.getUuid(), params, null);
        } catch (final InsufficientCapacityException e) {
            s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner");
            _itMgr.advanceStart(vm.getUuid(), params, _haPlanners.get(0));
        }
        final VMInstanceVO started = _instanceDao.findById(vm.getId());
        if (started != null && started.getState() == VirtualMachine.State.Running) {
            s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
            return null;
        }
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
        }
    } catch (final InsufficientCapacityException e) {
        s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
        _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
    } catch (final ResourceUnavailableException e) {
        s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
        _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
    } catch (final ConcurrentOperationException e) {
        s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
        _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
    } catch (final OperationTimedoutException e) {
        s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
        _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc);
    }
    vm = _itMgr.findById(vm.getId());
    work.setUpdateTime(vm.getUpdated());
    work.setPreviousState(vm.getState());
    return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
}
Also used : AlertManager(com.cloud.alert.AlertManager) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) HashMap(java.util.HashMap) HostPodVO(com.cloud.dc.HostPodVO) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) UnknownVM(com.cloud.ha.Investigator.UnknownVM) InsufficientCapacityException(com.cloud.exception.InsufficientCapacityException) WorkType(com.cloud.ha.HaWork.WorkType) Zone(com.cloud.db.model.Zone) VMInstanceVO(com.cloud.vm.VMInstanceVO) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) HostVO(com.cloud.host.HostVO) ResourceUnavailableException(com.cloud.exception.ResourceUnavailableException) VirtualMachine(com.cloud.vm.VirtualMachine)

Example 64 with VirtualMachine

use of com.cloud.vm.VirtualMachine in project cosmic by MissionCriticalCloud.

the class HypervisorGuruBase method toVirtualMachineTO.

protected VirtualMachineTO toVirtualMachineTO(final VirtualMachineProfile vmProfile) {
    final ServiceOffering offering = _serviceOfferingDao.findById(vmProfile.getId(), vmProfile.getServiceOfferingId());
    final VirtualMachine vm = vmProfile.getVirtualMachine();
    final Long minMemory = (long) (offering.getRamSize() / vmProfile.getMemoryOvercommitRatio());
    final VirtualMachineTO to = new VirtualMachineTO(vm.getId(), vm.getInstanceName(), vm.getType(), offering.getCpu(), minMemory * 1024l * 1024l, offering.getRamSize() * 1024l * 1024l, null, null, vm.isHaEnabled(), vm.limitCpuUse(), vm.getVncPassword());
    to.setBootArgs(vmProfile.getBootArgs());
    final List<NicProfile> nicProfiles = vmProfile.getNics();
    final NicTO[] nics = new NicTO[nicProfiles.size()];
    int i = 0;
    for (final NicProfile nicProfile : nicProfiles) {
        nics[i++] = toNicTO(nicProfile);
    }
    to.setNics(nics);
    to.setDisks(vmProfile.getDisks().toArray(new DiskTO[vmProfile.getDisks().size()]));
    if (vmProfile.getTemplate().getBits() == 32) {
        to.setArch("i686");
    } else {
        to.setArch("x86_64");
    }
    final Map<String, String> detailsInVm = _userVmDetailsDao.listDetailsKeyPairs(vm.getId());
    if (detailsInVm != null) {
        to.setDetails(detailsInVm);
    }
    // Set GPU details
    ServiceOfferingDetailsVO offeringDetail;
    if ((offeringDetail = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
        final ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
        to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), groupName.getValue(), offeringDetail.getValue()));
    }
    // Workaround to make sure the TO has the UUID we need for Niciri integration
    final VMInstanceVO vmInstance = _virtualMachineDao.findById(to.getId());
    // check if XStools tools are present in the VM and dynamic scaling feature is enabled (per zone/global)
    final Boolean isDynamicallyScalable = vmInstance.isDynamicallyScalable() && UserVmManager.EnableDynamicallyScaleVm.valueIn(vm.getDataCenterId());
    to.setEnableDynamicallyScaleVm(isDynamicallyScalable);
    to.setUuid(vmInstance.getUuid());
    to.setVmData(vmProfile.getVmData());
    to.setConfigDriveLabel(vmProfile.getConfigDriveLabel());
    to.setConfigDriveIsoRootFolder(vmProfile.getConfigDriveIsoRootFolder());
    to.setConfigDriveIsoFile(vmProfile.getConfigDriveIsoFile());
    final MetadataTO metadataTO = new MetadataTO();
    final DomainVO domain = _domainDao.findById(vm.getDomainId());
    metadataTO.setDomainUuid(domain.getUuid());
    to.setMetadata(metadataTO);
    return to;
}
Also used : ServiceOffering(com.cloud.offering.ServiceOffering) MetadataTO(com.cloud.agent.api.to.MetadataTO) ServiceOfferingDetailsVO(com.cloud.service.ServiceOfferingDetailsVO) VMInstanceVO(com.cloud.vm.VMInstanceVO) NicProfile(com.cloud.vm.NicProfile) VirtualMachineTO(com.cloud.agent.api.to.VirtualMachineTO) DomainVO(com.cloud.domain.DomainVO) VirtualMachine(com.cloud.vm.VirtualMachine) NicTO(com.cloud.agent.api.to.NicTO) DiskTO(com.cloud.agent.api.to.DiskTO)

Example 65 with VirtualMachine

use of com.cloud.vm.VirtualMachine in project cosmic by MissionCriticalCloud.

the class ManagementServerImpl method upgradeSystemVM.

@Override
@ActionEvent(eventType = EventTypes.EVENT_VM_UPGRADE, eventDescription = "Upgrading system VM", async = true)
public VirtualMachine upgradeSystemVM(final ScaleSystemVMCmd cmd) throws ResourceUnavailableException, ManagementServerException, VirtualMachineMigrationException, ConcurrentOperationException {
    final VMInstanceVO vmInstance = _vmInstanceDao.findById(cmd.getId());
    if (vmInstance.getHypervisorType() == HypervisorType.XenServer && vmInstance.getState().equals(State.Running)) {
        throw new InvalidParameterValueException("Dynamic Scaling operation is not permitted for this hypervisor on system vm");
    }
    final boolean result = _userVmMgr.upgradeVirtualMachine(cmd.getId(), cmd.getServiceOfferingId(), cmd.getDetails());
    if (result) {
        final VirtualMachine vm = _vmInstanceDao.findById(cmd.getId());
        return vm;
    } else {
        throw new CloudRuntimeException("Failed to upgrade System VM");
    }
}
Also used : InvalidParameterValueException(com.cloud.utils.exception.InvalidParameterValueException) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) VMInstanceVO(com.cloud.vm.VMInstanceVO) VirtualMachine(com.cloud.vm.VirtualMachine) ActionEvent(com.cloud.event.ActionEvent)

Aggregations

VirtualMachine (com.cloud.vm.VirtualMachine)141 HostVO (com.cloud.host.HostVO)38 ArrayList (java.util.ArrayList)35 CloudRuntimeException (com.cloud.utils.exception.CloudRuntimeException)26 HashMap (java.util.HashMap)25 List (java.util.List)23 InvalidParameterValueException (com.cloud.exception.InvalidParameterValueException)21 VMInstanceVO (com.cloud.vm.VMInstanceVO)20 ConcurrentOperationException (com.cloud.exception.ConcurrentOperationException)19 ResourceUnavailableException (com.cloud.exception.ResourceUnavailableException)19 DataCenter (com.cloud.dc.DataCenter)17 Host (com.cloud.host.Host)17 ServiceOffering (com.cloud.offering.ServiceOffering)17 Test (org.junit.Test)17 ServerApiException (com.cloud.api.ServerApiException)16 SystemVmResponse (com.cloud.api.response.SystemVmResponse)14 InvalidParameterValueException (com.cloud.utils.exception.InvalidParameterValueException)14 OperationTimedoutException (com.cloud.exception.OperationTimedoutException)13 Account (com.cloud.user.Account)13 UserVm (com.cloud.uservm.UserVm)13