use of com.cloud.agent.api.to.GPUDeviceTO in project cosmic by MissionCriticalCloud.
the class VirtualMachineManagerImpl method orchestrateStart.
@Override
public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
final CallContext cctxt = CallContext.current();
final Account account = cctxt.getCallingAccount();
final User caller = cctxt.getCallingUser();
VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final Ternary<VMInstanceVO, ReservationContext, ItWorkVO> start = changeToStartState(vm, caller, account);
if (start == null) {
return;
}
vm = start.first();
final ReservationContext ctx = start.second();
ItWorkVO work = start.third();
VMInstanceVO startedVm = null;
final ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, vm.getTemplateId());
s_logger.debug("Trying to deploy VM, vm has dcId: " + vm.getDataCenterId() + " and podId: " + vm.getPodIdToDeployIn());
DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodIdToDeployIn(), null, null, null, null, ctx);
if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
s_logger.debug("advanceStart: DeploymentPlan is provided, using dcId:" + planToDeploy.getDataCenterId() + ", podId: " + planToDeploy.getPodId() + ", clusterId: " + planToDeploy.getClusterId() + ", hostId: " + planToDeploy.getHostId() + ", poolId: " + planToDeploy.getPoolId());
plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), planToDeploy.getPoolId(), planToDeploy.getPhysicalNetworkId(), ctx);
}
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
final VirtualMachineGuru vmGuru = getVmGuru(vm);
boolean canRetry = true;
ExcludeList avoids = null;
try {
final Journal journal = start.second().getJournal();
if (planToDeploy != null) {
avoids = planToDeploy.getAvoids();
}
if (avoids == null) {
avoids = new ExcludeList();
}
s_logger.debug("VM start orchestration will {}", avoids.toString());
boolean planChangedByVolume = false;
boolean reuseVolume = true;
final DataCenterDeployment originalPlan = plan;
int retry = StartRetry.value();
while (retry-- != 0) {
if (reuseVolume) {
// edit plan if this vm's ROOT volume is in READY state already
final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
for (final VolumeVO vol : vols) {
// make sure if the templateId is unchanged. If it is changed,
// let planner
// reassign pool for the volume even if it ready.
final Long volTemplateId = vol.getTemplateId();
if (volTemplateId != null && volTemplateId != template.getId()) {
s_logger.debug(vol + " of " + vm + " is READY, but template ids don't match, let the planner reassign a new pool");
continue;
}
final StoragePool pool = (StoragePool) dataStoreMgr.getPrimaryDataStore(vol.getPoolId());
if (!pool.isInMaintenance()) {
s_logger.debug("Root volume is ready, need to place VM in volume's cluster");
final long rootVolDcId = pool.getDataCenterId();
final Long rootVolPodId = pool.getPodId();
final Long rootVolClusterId = pool.getClusterId();
if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
final Long clusterIdSpecified = planToDeploy.getClusterId();
if (clusterIdSpecified != null && rootVolClusterId != null) {
checkIfPlanIsDeployable(vm, rootVolClusterId, clusterIdSpecified);
}
plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), vol.getPoolId(), null, ctx);
} else {
plan = new DataCenterDeployment(rootVolDcId, rootVolPodId, rootVolClusterId, null, vol.getPoolId(), null, ctx);
s_logger.debug(vol + " is READY, changing deployment plan to use this pool's dcId: " + rootVolDcId + " , podId: " + rootVolPodId + " , and clusterId: " + rootVolClusterId);
planChangedByVolume = true;
}
}
}
}
final Account owner = _entityMgr.findById(Account.class, vm.getAccountId());
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, owner, params);
DeployDestination dest;
try {
dest = _dpMgr.planDeployment(vmProfile, plan, avoids, planner);
} catch (final AffinityConflictException e2) {
s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
}
if (dest == null) {
if (planChangedByVolume) {
plan = originalPlan;
planChangedByVolume = false;
// do not enter volume reuse for next retry, since we want to look for resources outside the volume's cluster
reuseVolume = false;
continue;
}
throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(), areAffinityGroupsAssociated(vmProfile));
}
if (dest != null) {
avoids.addHost(dest.getHost().getId());
journal.record("Deployment found ", vmProfile, dest);
}
long destHostId = dest.getHost().getId();
vm.setPodIdToDeployIn(dest.getPod().getId());
final Long cluster_id = dest.getCluster().getId();
final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio");
final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio");
// storing the value of overcommit in the vm_details table for doing a capacity check in case the cluster overcommit ratio is changed.
if (_uservmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") == null && (Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) {
_uservmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true);
_uservmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
} else if (_uservmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") != null) {
_uservmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true);
_uservmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
}
vmProfile.setCpuOvercommitRatio(Float.parseFloat(cluster_detail_cpu.getValue()));
vmProfile.setMemoryOvercommitRatio(Float.parseFloat(cluster_detail_ram.getValue()));
StartAnswer startAnswer = null;
try {
if (!changeState(vm, Event.OperationRetry, destHostId, work, Step.Prepare)) {
throw new ConcurrentOperationException("Unable to update the state of the Virtual Machine " + vm.getUuid() + " oldstate: " + vm.getState() + "Event :" + Event.OperationRetry);
}
} catch (final NoTransitionException e1) {
throw new ConcurrentOperationException(e1.getMessage());
}
try {
s_logger.debug("VM is being created in podId: " + vm.getPodIdToDeployIn());
_networkMgr.prepare(vmProfile, dest, ctx);
volumeMgr.prepare(vmProfile, dest);
// since StorageMgr succeeded in volume creation, reuse Volume for further tries until current cluster has capacity
if (!reuseVolume) {
reuseVolume = true;
}
vmGuru.finalizeVirtualMachineProfile(vmProfile, dest, ctx);
final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
handlePath(vmTO.getDisks(), vm.getHypervisorType());
final Commands cmds = new Commands(Command.OnError.Stop);
cmds.addCommand(new StartCommand(vmTO, dest.getHost(), getExecuteInSequence(vm.getHypervisorType())));
vmGuru.finalizeDeployment(cmds, vmProfile, dest, ctx);
work = _workDao.findById(work.getId());
if (work == null || work.getStep() != Step.Prepare) {
throw new ConcurrentOperationException("Work steps have been changed: " + work);
}
_workDao.updateStep(work, Step.Starting);
_agentMgr.send(destHostId, cmds);
_workDao.updateStep(work, Step.Started);
startAnswer = cmds.getAnswer(StartAnswer.class);
if (startAnswer != null && startAnswer.getResult()) {
handlePath(vmTO.getDisks(), startAnswer.getIqnToPath());
final String host_guid = startAnswer.getHost_guid();
if (host_guid != null) {
final HostVO finalHost = _resourceMgr.findHostByGuid(host_guid);
if (finalHost == null) {
throw new CloudRuntimeException("Host Guid " + host_guid + " doesn't exist in DB, something went wrong while processing start answer: " + startAnswer);
}
destHostId = finalHost.getId();
}
if (vmGuru.finalizeStart(vmProfile, destHostId, cmds, ctx)) {
syncDiskChainChange(startAnswer);
if (!changeState(vm, Event.OperationSucceeded, destHostId, work, Step.Done)) {
s_logger.error("Unable to transition to a new state. VM uuid: " + vm.getUuid() + "VM oldstate:" + vm.getState() + "Event:" + Event.OperationSucceeded);
throw new ConcurrentOperationException("Failed to deploy VM" + vm.getUuid());
}
// Update GPU device capacity
final GPUDeviceTO gpuDevice = startAnswer.getVirtualMachine().getGpuDevice();
if (gpuDevice != null) {
_resourceMgr.updateGPUDetails(destHostId, gpuDevice.getGroupDetails());
}
startedVm = vm;
s_logger.debug("Start completed for VM " + vm);
return;
} else {
s_logger.info("The guru did not like the answers so stopping " + vm);
final StopCommand cmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false);
final Answer answer = _agentMgr.easySend(destHostId, cmd);
if (answer != null && answer instanceof StopAnswer) {
final StopAnswer stopAns = (StopAnswer) answer;
if (vm.getType() == VirtualMachine.Type.User) {
final String platform = stopAns.getPlatform();
if (platform != null) {
final Map<String, String> vmmetadata = new HashMap<>();
vmmetadata.put(vm.getInstanceName(), platform);
syncVMMetaData(vmmetadata);
}
}
}
if (answer == null || !answer.getResult()) {
s_logger.warn("Unable to stop " + vm + " due to " + (answer != null ? answer.getDetails() : "no answers"));
_haMgr.scheduleStop(vm, destHostId, WorkType.ForceStop);
throw new ExecutionException("Unable to stop this VM, " + vm.getUuid() + " so we are unable to retry the start operation");
}
throw new ExecutionException("Unable to start VM:" + vm.getUuid() + " due to error in finalizeStart, not retrying");
}
}
s_logger.info("Unable to start VM on " + dest.getHost() + " due to " + (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
break;
}
} catch (final OperationTimedoutException e) {
s_logger.debug("Unable to send the start command to host " + dest.getHost() + " failed to start VM: " + vm.getUuid());
if (e.isActive()) {
_haMgr.scheduleStop(vm, destHostId, WorkType.CheckStop);
}
canRetry = false;
throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
} catch (final ResourceUnavailableException e) {
s_logger.info("Unable to contact resource.", e);
if (!avoids.add(e)) {
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
throw e;
} else {
s_logger.warn("unexpected ResourceUnavailableException : " + e.getScope().getName(), e);
throw e;
}
}
} catch (final InsufficientCapacityException e) {
s_logger.info("Insufficient capacity ", e);
if (!avoids.add(e)) {
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
throw e;
} else {
s_logger.warn("unexpected InsufficientCapacityException : " + e.getScope().getName(), e);
}
}
} catch (final ExecutionException | NoTransitionException e) {
s_logger.error("Failed to start instance " + vm, e);
throw new AgentUnavailableException("Unable to start instance due to " + e.getMessage(), destHostId, e);
} finally {
if (startedVm == null && canRetry) {
final Step prevStep = work.getStep();
_workDao.updateStep(work, Step.Release);
// If previous step was started/ing && we got a valid answer
if ((prevStep == Step.Started || prevStep == Step.Starting) && startAnswer != null && startAnswer.getResult()) {
// TODO check the response of cleanup
// and record it in DB for retry
cleanup(vmGuru, vmProfile, work, Event.OperationFailed, false);
} else {
// if step is not starting/started, send cleanup command with force=true
cleanup(vmGuru, vmProfile, work, Event.OperationFailed, true);
}
}
}
}
} finally {
if (startedVm == null) {
if (canRetry) {
try {
changeState(vm, Event.OperationFailed, null, work, Step.Done);
} catch (final NoTransitionException e) {
throw new ConcurrentOperationException(e.getMessage());
}
}
}
if (planToDeploy != null) {
planToDeploy.setAvoids(avoids);
}
}
if (startedVm == null) {
throw new CloudRuntimeException("Unable to start instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
}
}
use of com.cloud.agent.api.to.GPUDeviceTO in project cosmic by MissionCriticalCloud.
the class VirtualMachineManagerImpl method sendStop.
protected boolean sendStop(final VirtualMachineGuru guru, final VirtualMachineProfile profile, final boolean force, final boolean checkBeforeCleanup) {
final VirtualMachine vm = profile.getVirtualMachine();
final StopCommand stop = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), checkBeforeCleanup);
try {
final Answer answer = _agentMgr.send(vm.getHostId(), stop);
if (answer != null && answer instanceof StopAnswer) {
final StopAnswer stopAns = (StopAnswer) answer;
if (vm.getType() == VirtualMachine.Type.User) {
final String platform = stopAns.getPlatform();
if (platform != null) {
final UserVmVO userVm = _userVmDao.findById(vm.getId());
_userVmDao.loadDetails(userVm);
userVm.setDetail("platform", platform);
_userVmDao.saveDetails(userVm);
}
}
final GPUDeviceTO gpuDevice = stop.getGpuDevice();
if (gpuDevice != null) {
_resourceMgr.updateGPUDetails(vm.getHostId(), gpuDevice.getGroupDetails());
}
if (!answer.getResult()) {
final String details = answer.getDetails();
s_logger.debug("Unable to stop VM due to " + details);
return false;
}
guru.finalizeStop(profile, answer);
} else {
s_logger.error("Invalid answer received in response to a StopCommand for " + vm.getInstanceName());
return false;
}
} catch (final AgentUnavailableException e) {
if (!force) {
return false;
}
} catch (final OperationTimedoutException e) {
if (!force) {
return false;
}
}
return true;
}
use of com.cloud.agent.api.to.GPUDeviceTO in project cosmic by MissionCriticalCloud.
the class CitrixStartCommandWrapper method execute.
@Override
public Answer execute(final StartCommand command, final CitrixResourceBase citrixResourceBase) {
final Connection conn = citrixResourceBase.getConnection();
final VirtualMachineTO vmSpec = command.getVirtualMachine();
final String vmName = vmSpec.getName();
VmPowerState state = VmPowerState.HALTED;
VM vm = null;
// if a VDI is created, record its UUID to send back to the CS MS
final Map<String, String> iqnToPath = new HashMap<>();
try {
final Set<VM> vms = VM.getByNameLabel(conn, vmName);
if (vms != null) {
for (final VM v : vms) {
final VM.Record vRec = v.getRecord(conn);
if (vRec.powerState == VmPowerState.HALTED) {
v.destroy(conn);
} else if (vRec.powerState == VmPowerState.RUNNING) {
final String host = vRec.residentOn.getUuid(conn);
final String msg = "VM " + vmName + " is runing on host " + host;
s_logger.debug(msg);
return new StartAnswer(command, msg, host);
} else {
final String msg = "There is already a VM having the same name " + vmName + " vm record " + vRec.toString();
s_logger.warn(msg);
return new StartAnswer(command, msg);
}
}
}
s_logger.debug("1. The VM " + vmName + " is in Starting state.");
final Host host = Host.getByUuid(conn, citrixResourceBase.getHost().getUuid());
vm = citrixResourceBase.createVmFromTemplate(conn, vmSpec, host);
final GPUDeviceTO gpuDevice = vmSpec.getGpuDevice();
if (gpuDevice != null) {
s_logger.debug("Creating VGPU for of VGPU type: " + gpuDevice.getVgpuType() + " in GPU group " + gpuDevice.getGpuGroup() + " for VM " + vmName);
citrixResourceBase.createVGPU(conn, command, vm, gpuDevice);
}
if (vmSpec.getType() != VirtualMachine.Type.User) {
citrixResourceBase.createPatchVbd(conn, vmName, vm);
}
// put cdrom at the first place in the list
final List<DiskTO> disks = new ArrayList<>(vmSpec.getDisks().length);
int index = 0;
for (final DiskTO disk : vmSpec.getDisks()) {
if (Volume.Type.ISO.equals(disk.getType())) {
disks.add(0, disk);
} else {
disks.add(index, disk);
}
index++;
}
for (final DiskTO disk : disks) {
final VDI newVdi = citrixResourceBase.prepareManagedDisk(conn, disk, vmName);
if (newVdi != null) {
final String path = newVdi.getUuid(conn);
iqnToPath.put(disk.getDetails().get(DiskTO.IQN), path);
}
citrixResourceBase.createVbd(conn, disk, vmName, vm, vmSpec.getBootloader(), newVdi);
}
for (final NicTO nic : vmSpec.getNics()) {
citrixResourceBase.createVif(conn, vmName, vm, vmSpec, nic);
}
citrixResourceBase.startVM(conn, host, vm, vmName);
state = VmPowerState.RUNNING;
final StartAnswer startAnswer = new StartAnswer(command);
startAnswer.setIqnToPath(iqnToPath);
return startAnswer;
} catch (final Exception e) {
s_logger.warn("Catch Exception: " + e.getClass().toString() + " due to " + e.toString(), e);
final String msg = citrixResourceBase.handleVmStartFailure(conn, vmName, vm, "", e);
final StartAnswer startAnswer = new StartAnswer(command, msg);
startAnswer.setIqnToPath(iqnToPath);
return startAnswer;
} finally {
if (state != VmPowerState.HALTED) {
s_logger.debug("2. The VM " + vmName + " is in " + state + " state.");
} else {
s_logger.debug("The VM is in stopped state, detected problem during startup : " + vmName);
}
}
}
use of com.cloud.agent.api.to.GPUDeviceTO in project cosmic by MissionCriticalCloud.
the class CitrixStopCommandWrapper method execute.
@Override
public Answer execute(final StopCommand command, final CitrixResourceBase citrixResourceBase) {
final String vmName = command.getVmName();
String platformstring = null;
try {
final Connection conn = citrixResourceBase.getConnection();
final Set<VM> vms = VM.getByNameLabel(conn, vmName);
// stop vm which is running on this host or is in halted state
final Iterator<VM> iter = vms.iterator();
while (iter.hasNext()) {
final VM vm = iter.next();
final VM.Record vmr = vm.getRecord(conn);
if (vmr.powerState != VmPowerState.RUNNING) {
continue;
}
if (citrixResourceBase.isRefNull(vmr.residentOn)) {
continue;
}
if (vmr.residentOn.getUuid(conn).equals(citrixResourceBase.getHost().getUuid())) {
continue;
}
iter.remove();
}
if (vms.size() == 0) {
return new StopAnswer(command, "VM does not exist", true);
}
for (final VM vm : vms) {
final VM.Record vmr = vm.getRecord(conn);
platformstring = StringUtils.mapToString(vmr.platform);
if (vmr.isControlDomain) {
final String msg = "Tring to Shutdown control domain";
s_logger.warn(msg);
return new StopAnswer(command, msg, false);
}
if (vmr.powerState == VmPowerState.RUNNING && !citrixResourceBase.isRefNull(vmr.residentOn) && !vmr.residentOn.getUuid(conn).equals(citrixResourceBase.getHost().getUuid())) {
final String msg = "Stop Vm " + vmName + " failed due to this vm is not running on this host: " + citrixResourceBase.getHost().getUuid() + " but host:" + vmr.residentOn.getUuid(conn);
s_logger.warn(msg);
return new StopAnswer(command, msg, platformstring, false);
}
if (command.checkBeforeCleanup() && vmr.powerState == VmPowerState.RUNNING) {
final String msg = "Vm " + vmName + " is running on host and checkBeforeCleanup flag is set, so bailing out";
s_logger.debug(msg);
return new StopAnswer(command, msg, false);
}
s_logger.debug("9. The VM " + vmName + " is in Stopping state");
try {
if (vmr.powerState == VmPowerState.RUNNING) {
/* when stop a vm, set affinity to current xenserver */
vm.setAffinity(conn, vm.getResidentOn(conn));
if (citrixResourceBase.canBridgeFirewall()) {
final String result = citrixResourceBase.callHostPlugin(conn, "vmops", "destroy_network_rules_for_vm", "vmName", command.getVmName());
if (result == null || result.isEmpty() || !Boolean.parseBoolean(result)) {
s_logger.warn("Failed to remove network rules for vm " + command.getVmName());
} else {
s_logger.info("Removed network rules for vm " + command.getVmName());
}
}
citrixResourceBase.shutdownVM(conn, vm, vmName, command.isForceStop());
}
} catch (final Exception e) {
final String msg = "Catch exception " + e.getClass().getName() + " when stop VM:" + command.getVmName() + " due to " + e.toString();
s_logger.debug(msg);
return new StopAnswer(command, msg, platformstring, false);
} finally {
try {
if (vm.getPowerState(conn) == VmPowerState.HALTED) {
Set<VGPU> vGPUs = null;
// Get updated GPU details
try {
vGPUs = vm.getVGPUs(conn);
} catch (final XenAPIException e2) {
s_logger.debug("VM " + vmName + " does not have GPU support.");
}
if (vGPUs != null && !vGPUs.isEmpty()) {
final HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = citrixResourceBase.getGPUGroupDetails(conn);
command.setGpuDevice(new GPUDeviceTO(null, null, groupDetails));
}
final Set<VIF> vifs = vm.getVIFs(conn);
final List<Network> networks = new ArrayList<>();
for (final VIF vif : vifs) {
networks.add(vif.getNetwork(conn));
}
vm.destroy(conn);
final SR sr = citrixResourceBase.getISOSRbyVmName(conn, command.getVmName());
citrixResourceBase.removeSR(conn, sr);
// anymore
for (final Network network : networks) {
try {
if (network.getNameLabel(conn).startsWith("VLAN")) {
citrixResourceBase.disableVlanNetwork(conn, network);
}
} catch (final Exception e) {
// network might be destroyed by other host
}
}
return new StopAnswer(command, "Stop VM " + vmName + " Succeed", platformstring, true);
}
} catch (final Exception e) {
final String msg = "VM destroy failed in Stop " + vmName + " Command due to " + e.getMessage();
s_logger.warn(msg, e);
} finally {
s_logger.debug("10. The VM " + vmName + " is in Stopped state");
}
}
}
} catch (final Exception e) {
final String msg = "Stop Vm " + vmName + " fail due to " + e.toString();
s_logger.warn(msg, e);
return new StopAnswer(command, msg, platformstring, false);
}
return new StopAnswer(command, "Stop VM failed", platformstring, false);
}
use of com.cloud.agent.api.to.GPUDeviceTO in project cloudstack by apache.
the class VirtualMachineManagerImpl method orchestrateStart.
@Override
public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
final CallContext cctxt = CallContext.current();
final Account account = cctxt.getCallingAccount();
final User caller = cctxt.getCallingUser();
VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
final VirtualMachineGuru vmGuru = getVmGuru(vm);
final Ternary<VMInstanceVO, ReservationContext, ItWorkVO> start = changeToStartState(vmGuru, vm, caller, account);
if (start == null) {
return;
}
vm = start.first();
final ReservationContext ctx = start.second();
ItWorkVO work = start.third();
VMInstanceVO startedVm = null;
final ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, vm.getTemplateId());
DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodIdToDeployIn(), null, null, null, null, ctx);
if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("advanceStart: DeploymentPlan is provided, using dcId:" + planToDeploy.getDataCenterId() + ", podId: " + planToDeploy.getPodId() + ", clusterId: " + planToDeploy.getClusterId() + ", hostId: " + planToDeploy.getHostId() + ", poolId: " + planToDeploy.getPoolId());
}
plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), planToDeploy.getPoolId(), planToDeploy.getPhysicalNetworkId(), ctx);
}
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
// check resource count if ResourceCountRunningVMsonly.value() = true
final Account owner = _entityMgr.findById(Account.class, vm.getAccountId());
if (VirtualMachine.Type.User.equals(vm.type) && ResourceCountRunningVMsonly.value()) {
resourceCountIncrement(owner.getAccountId(), new Long(offering.getCpu()), new Long(offering.getRamSize()));
}
boolean canRetry = true;
ExcludeList avoids = null;
try {
final Journal journal = start.second().getJournal();
if (planToDeploy != null) {
avoids = planToDeploy.getAvoids();
}
if (avoids == null) {
avoids = new ExcludeList();
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
}
boolean planChangedByVolume = false;
boolean reuseVolume = true;
final DataCenterDeployment originalPlan = plan;
int retry = StartRetry.value();
while (retry-- != 0) {
s_logger.debug("VM start attempt #" + (StartRetry.value() - retry));
if (reuseVolume) {
final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
for (final VolumeVO vol : vols) {
final Long volTemplateId = vol.getTemplateId();
if (volTemplateId != null && volTemplateId != template.getId()) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(vol + " of " + vm + " is READY, but template ids don't match, let the planner reassign a new pool");
}
continue;
}
final StoragePool pool = (StoragePool) dataStoreMgr.getPrimaryDataStore(vol.getPoolId());
if (!pool.isInMaintenance()) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Root volume is ready, need to place VM in volume's cluster");
}
final long rootVolDcId = pool.getDataCenterId();
final Long rootVolPodId = pool.getPodId();
final Long rootVolClusterId = pool.getClusterId();
if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
final Long clusterIdSpecified = planToDeploy.getClusterId();
if (clusterIdSpecified != null && rootVolClusterId != null) {
if (!rootVolClusterId.equals(clusterIdSpecified)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Cannot satisfy the deployment plan passed in since the ready Root volume is in different cluster. volume's cluster: " + rootVolClusterId + ", cluster specified: " + clusterIdSpecified);
}
throw new ResourceUnavailableException("Root volume is ready in different cluster, Deployment plan provided cannot be satisfied, unable to create a deployment for " + vm, Cluster.class, clusterIdSpecified);
}
}
plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), vol.getPoolId(), null, ctx);
} else {
plan = new DataCenterDeployment(rootVolDcId, rootVolPodId, rootVolClusterId, null, vol.getPoolId(), null, ctx);
if (s_logger.isDebugEnabled()) {
s_logger.debug(vol + " is READY, changing deployment plan to use this pool's dcId: " + rootVolDcId + " , podId: " + rootVolPodId + " , and clusterId: " + rootVolClusterId);
}
planChangedByVolume = true;
}
}
}
}
final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, owner, params);
logBootModeParameters(params);
DeployDestination dest = null;
try {
dest = _dpMgr.planDeployment(vmProfile, plan, avoids, planner);
} catch (final AffinityConflictException e2) {
s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
}
if (dest == null) {
if (planChangedByVolume) {
plan = originalPlan;
planChangedByVolume = false;
reuseVolume = false;
continue;
}
throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(), areAffinityGroupsAssociated(vmProfile));
}
avoids.addHost(dest.getHost().getId());
if (!template.isDeployAsIs()) {
journal.record("Deployment found - Attempt #" + (StartRetry.value() - retry), vmProfile, dest);
}
long destHostId = dest.getHost().getId();
vm.setPodIdToDeployIn(dest.getPod().getId());
final Long cluster_id = dest.getCluster().getId();
final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, VmDetailConstants.CPU_OVER_COMMIT_RATIO);
final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, VmDetailConstants.MEMORY_OVER_COMMIT_RATIO);
if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO) == null && (Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) {
userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, cluster_detail_cpu.getValue(), true);
userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, cluster_detail_ram.getValue(), true);
} else if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO) != null) {
userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, cluster_detail_cpu.getValue(), true);
userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, cluster_detail_ram.getValue(), true);
}
vmProfile.setCpuOvercommitRatio(Float.parseFloat(cluster_detail_cpu.getValue()));
vmProfile.setMemoryOvercommitRatio(Float.parseFloat(cluster_detail_ram.getValue()));
StartAnswer startAnswer = null;
try {
if (!changeState(vm, Event.OperationRetry, destHostId, work, Step.Prepare)) {
throw new ConcurrentOperationException("Unable to update the state of the Virtual Machine " + vm.getUuid() + " oldstate: " + vm.getState() + "Event :" + Event.OperationRetry);
}
} catch (final NoTransitionException e1) {
throw new ConcurrentOperationException(e1.getMessage());
}
try {
resetVmNicsDeviceId(vm.getId());
_networkMgr.prepare(vmProfile, dest, ctx);
if (vm.getHypervisorType() != HypervisorType.BareMetal) {
volumeMgr.prepare(vmProfile, dest);
}
if (!reuseVolume) {
reuseVolume = true;
}
vmGuru.finalizeVirtualMachineProfile(vmProfile, dest, ctx);
final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
checkAndSetEnterSetupMode(vmTO, params);
handlePath(vmTO.getDisks(), vm.getHypervisorType());
Commands cmds = new Commands(Command.OnError.Stop);
cmds.addCommand(new StartCommand(vmTO, dest.getHost(), getExecuteInSequence(vm.getHypervisorType())));
vmGuru.finalizeDeployment(cmds, vmProfile, dest, ctx);
addExtraConfig(vmTO);
work = _workDao.findById(work.getId());
if (work == null || work.getStep() != Step.Prepare) {
throw new ConcurrentOperationException("Work steps have been changed: " + work);
}
_workDao.updateStep(work, Step.Starting);
_agentMgr.send(destHostId, cmds);
_workDao.updateStep(work, Step.Started);
startAnswer = cmds.getAnswer(StartAnswer.class);
if (startAnswer != null && startAnswer.getResult()) {
handlePath(vmTO.getDisks(), startAnswer.getIqnToData());
final String host_guid = startAnswer.getHost_guid();
if (host_guid != null) {
final HostVO finalHost = _resourceMgr.findHostByGuid(host_guid);
if (finalHost == null) {
throw new CloudRuntimeException("Host Guid " + host_guid + " doesn't exist in DB, something went wrong while processing start answer: " + startAnswer);
}
destHostId = finalHost.getId();
}
if (vmGuru.finalizeStart(vmProfile, destHostId, cmds, ctx)) {
syncDiskChainChange(startAnswer);
if (!changeState(vm, Event.OperationSucceeded, destHostId, work, Step.Done)) {
s_logger.error("Unable to transition to a new state. VM uuid: " + vm.getUuid() + "VM oldstate:" + vm.getState() + "Event:" + Event.OperationSucceeded);
throw new ConcurrentOperationException("Failed to deploy VM" + vm.getUuid());
}
final GPUDeviceTO gpuDevice = startAnswer.getVirtualMachine().getGpuDevice();
if (gpuDevice != null) {
_resourceMgr.updateGPUDetails(destHostId, gpuDevice.getGroupDetails());
}
if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.DEPLOY_VM) != null) {
userVmDetailsDao.removeDetail(vm.getId(), VmDetailConstants.DEPLOY_VM);
}
startedVm = vm;
if (s_logger.isDebugEnabled()) {
s_logger.debug("Start completed for VM " + vm);
}
final Host vmHost = _hostDao.findById(destHostId);
if (vmHost != null && (VirtualMachine.Type.ConsoleProxy.equals(vm.getType()) || VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) && caManager.canProvisionCertificates()) {
final Map<String, String> sshAccessDetails = _networkMgr.getSystemVMAccessDetails(vm);
for (int retries = 3; retries > 0; retries--) {
try {
setupAgentSecurity(vmHost, sshAccessDetails, vm);
return;
} catch (final AgentUnavailableException | OperationTimedoutException e) {
s_logger.error("Retrying after catching exception while trying to secure agent for systemvm id=" + vm.getId(), e);
}
}
throw new CloudRuntimeException("Failed to setup and secure agent for systemvm id=" + vm.getId());
}
return;
} else {
if (s_logger.isDebugEnabled()) {
s_logger.info("The guru did not like the answers so stopping " + vm);
}
StopCommand stopCmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false);
stopCmd.setControlIp(getControlNicIpForVM(vm));
Map<String, Boolean> vlanToPersistenceMap = getVlanToPersistenceMapForVM(vm.getId());
if (MapUtils.isNotEmpty(vlanToPersistenceMap)) {
stopCmd.setVlanToPersistenceMap(vlanToPersistenceMap);
}
final StopCommand cmd = stopCmd;
final Answer answer = _agentMgr.easySend(destHostId, cmd);
if (answer != null && answer instanceof StopAnswer) {
final StopAnswer stopAns = (StopAnswer) answer;
if (vm.getType() == VirtualMachine.Type.User) {
final String platform = stopAns.getPlatform();
if (platform != null) {
final Map<String, String> vmmetadata = new HashMap<>();
vmmetadata.put(vm.getInstanceName(), platform);
syncVMMetaData(vmmetadata);
}
}
}
if (answer == null || !answer.getResult()) {
s_logger.warn("Unable to stop " + vm + " due to " + (answer != null ? answer.getDetails() : "no answers"));
_haMgr.scheduleStop(vm, destHostId, WorkType.ForceStop);
throw new ExecutionException("Unable to stop this VM, " + vm.getUuid() + " so we are unable to retry the start operation");
}
throw new ExecutionException("Unable to start VM:" + vm.getUuid() + " due to error in finalizeStart, not retrying");
}
}
s_logger.info("Unable to start VM on " + dest.getHost() + " due to " + (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
break;
}
} catch (OperationTimedoutException e) {
s_logger.debug("Unable to send the start command to host " + dest.getHost() + " failed to start VM: " + vm.getUuid());
if (e.isActive()) {
_haMgr.scheduleStop(vm, destHostId, WorkType.CheckStop);
}
canRetry = false;
throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
} catch (final ResourceUnavailableException e) {
s_logger.warn("Unable to contact resource.", e);
if (!avoids.add(e)) {
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
throw e;
} else {
s_logger.warn("unexpected ResourceUnavailableException : " + e.getScope().getName(), e);
throw e;
}
}
} catch (final InsufficientCapacityException e) {
s_logger.warn("Insufficient capacity ", e);
if (!avoids.add(e)) {
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
throw e;
} else {
s_logger.warn("unexpected InsufficientCapacityException : " + e.getScope().getName(), e);
}
}
} catch (ExecutionException | NoTransitionException e) {
s_logger.error("Failed to start instance " + vm, e);
throw new AgentUnavailableException("Unable to start instance due to " + e.getMessage(), destHostId, e);
} catch (final StorageAccessException e) {
s_logger.warn("Unable to access storage on host", e);
} finally {
if (startedVm == null && canRetry) {
final Step prevStep = work.getStep();
_workDao.updateStep(work, Step.Release);
if ((prevStep == Step.Started || prevStep == Step.Starting) && startAnswer != null && startAnswer.getResult()) {
cleanup(vmGuru, vmProfile, work, Event.OperationFailed, false);
} else {
cleanup(vmGuru, vmProfile, work, Event.OperationFailed, true);
}
}
}
}
} finally {
if (startedVm == null) {
if (VirtualMachine.Type.User.equals(vm.type) && ResourceCountRunningVMsonly.value()) {
resourceCountDecrement(owner.getAccountId(), new Long(offering.getCpu()), new Long(offering.getRamSize()));
}
if (canRetry) {
try {
changeState(vm, Event.OperationFailed, null, work, Step.Done);
} catch (final NoTransitionException e) {
throw new ConcurrentOperationException(e.getMessage());
}
}
}
if (planToDeploy != null) {
planToDeploy.setAvoids(avoids);
}
}
if (startedVm == null) {
throw new CloudRuntimeException("Unable to start instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
}
}
Aggregations