Search in sources :

Example 1 with Step

use of com.cloud.vm.ItWorkVO.Step in project cloudstack by apache.

the class VirtualMachineManagerImpl method orchestrateStart.

@Override
public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
    final CallContext cctxt = CallContext.current();
    final Account account = cctxt.getCallingAccount();
    final User caller = cctxt.getCallingUser();
    VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
    final VirtualMachineGuru vmGuru = getVmGuru(vm);
    final Ternary<VMInstanceVO, ReservationContext, ItWorkVO> start = changeToStartState(vmGuru, vm, caller, account);
    if (start == null) {
        return;
    }
    vm = start.first();
    final ReservationContext ctx = start.second();
    ItWorkVO work = start.third();
    VMInstanceVO startedVm = null;
    final ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
    final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, vm.getTemplateId());
    DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodIdToDeployIn(), null, null, null, null, ctx);
    if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("advanceStart: DeploymentPlan is provided, using dcId:" + planToDeploy.getDataCenterId() + ", podId: " + planToDeploy.getPodId() + ", clusterId: " + planToDeploy.getClusterId() + ", hostId: " + planToDeploy.getHostId() + ", poolId: " + planToDeploy.getPoolId());
        }
        plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), planToDeploy.getPoolId(), planToDeploy.getPhysicalNetworkId(), ctx);
    }
    final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
    // check resource count if ResourceCountRunningVMsonly.value() = true
    final Account owner = _entityMgr.findById(Account.class, vm.getAccountId());
    if (VirtualMachine.Type.User.equals(vm.type) && ResourceCountRunningVMsonly.value()) {
        resourceCountIncrement(owner.getAccountId(), new Long(offering.getCpu()), new Long(offering.getRamSize()));
    }
    boolean canRetry = true;
    ExcludeList avoids = null;
    try {
        final Journal journal = start.second().getJournal();
        if (planToDeploy != null) {
            avoids = planToDeploy.getAvoids();
        }
        if (avoids == null) {
            avoids = new ExcludeList();
        }
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
        }
        boolean planChangedByVolume = false;
        boolean reuseVolume = true;
        final DataCenterDeployment originalPlan = plan;
        int retry = StartRetry.value();
        while (retry-- != 0) {
            s_logger.debug("VM start attempt #" + (StartRetry.value() - retry));
            if (reuseVolume) {
                final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
                for (final VolumeVO vol : vols) {
                    final Long volTemplateId = vol.getTemplateId();
                    if (volTemplateId != null && volTemplateId != template.getId()) {
                        if (s_logger.isDebugEnabled()) {
                            s_logger.debug(vol + " of " + vm + " is READY, but template ids don't match, let the planner reassign a new pool");
                        }
                        continue;
                    }
                    final StoragePool pool = (StoragePool) dataStoreMgr.getPrimaryDataStore(vol.getPoolId());
                    if (!pool.isInMaintenance()) {
                        if (s_logger.isDebugEnabled()) {
                            s_logger.debug("Root volume is ready, need to place VM in volume's cluster");
                        }
                        final long rootVolDcId = pool.getDataCenterId();
                        final Long rootVolPodId = pool.getPodId();
                        final Long rootVolClusterId = pool.getClusterId();
                        if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
                            final Long clusterIdSpecified = planToDeploy.getClusterId();
                            if (clusterIdSpecified != null && rootVolClusterId != null) {
                                if (!rootVolClusterId.equals(clusterIdSpecified)) {
                                    if (s_logger.isDebugEnabled()) {
                                        s_logger.debug("Cannot satisfy the deployment plan passed in since the ready Root volume is in different cluster. volume's cluster: " + rootVolClusterId + ", cluster specified: " + clusterIdSpecified);
                                    }
                                    throw new ResourceUnavailableException("Root volume is ready in different cluster, Deployment plan provided cannot be satisfied, unable to create a deployment for " + vm, Cluster.class, clusterIdSpecified);
                                }
                            }
                            plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), vol.getPoolId(), null, ctx);
                        } else {
                            plan = new DataCenterDeployment(rootVolDcId, rootVolPodId, rootVolClusterId, null, vol.getPoolId(), null, ctx);
                            if (s_logger.isDebugEnabled()) {
                                s_logger.debug(vol + " is READY, changing deployment plan to use this pool's dcId: " + rootVolDcId + " , podId: " + rootVolPodId + " , and clusterId: " + rootVolClusterId);
                            }
                            planChangedByVolume = true;
                        }
                    }
                }
            }
            final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, owner, params);
            logBootModeParameters(params);
            DeployDestination dest = null;
            try {
                dest = _dpMgr.planDeployment(vmProfile, plan, avoids, planner);
            } catch (final AffinityConflictException e2) {
                s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
                throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
            }
            if (dest == null) {
                if (planChangedByVolume) {
                    plan = originalPlan;
                    planChangedByVolume = false;
                    reuseVolume = false;
                    continue;
                }
                throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(), areAffinityGroupsAssociated(vmProfile));
            }
            avoids.addHost(dest.getHost().getId());
            if (!template.isDeployAsIs()) {
                journal.record("Deployment found - Attempt #" + (StartRetry.value() - retry), vmProfile, dest);
            }
            long destHostId = dest.getHost().getId();
            vm.setPodIdToDeployIn(dest.getPod().getId());
            final Long cluster_id = dest.getCluster().getId();
            final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, VmDetailConstants.CPU_OVER_COMMIT_RATIO);
            final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, VmDetailConstants.MEMORY_OVER_COMMIT_RATIO);
            if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO) == null && (Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) {
                userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, cluster_detail_cpu.getValue(), true);
                userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, cluster_detail_ram.getValue(), true);
            } else if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO) != null) {
                userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, cluster_detail_cpu.getValue(), true);
                userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, cluster_detail_ram.getValue(), true);
            }
            vmProfile.setCpuOvercommitRatio(Float.parseFloat(cluster_detail_cpu.getValue()));
            vmProfile.setMemoryOvercommitRatio(Float.parseFloat(cluster_detail_ram.getValue()));
            StartAnswer startAnswer = null;
            try {
                if (!changeState(vm, Event.OperationRetry, destHostId, work, Step.Prepare)) {
                    throw new ConcurrentOperationException("Unable to update the state of the Virtual Machine " + vm.getUuid() + " oldstate: " + vm.getState() + "Event :" + Event.OperationRetry);
                }
            } catch (final NoTransitionException e1) {
                throw new ConcurrentOperationException(e1.getMessage());
            }
            try {
                resetVmNicsDeviceId(vm.getId());
                _networkMgr.prepare(vmProfile, dest, ctx);
                if (vm.getHypervisorType() != HypervisorType.BareMetal) {
                    volumeMgr.prepare(vmProfile, dest);
                }
                if (!reuseVolume) {
                    reuseVolume = true;
                }
                vmGuru.finalizeVirtualMachineProfile(vmProfile, dest, ctx);
                final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
                checkAndSetEnterSetupMode(vmTO, params);
                handlePath(vmTO.getDisks(), vm.getHypervisorType());
                Commands cmds = new Commands(Command.OnError.Stop);
                cmds.addCommand(new StartCommand(vmTO, dest.getHost(), getExecuteInSequence(vm.getHypervisorType())));
                vmGuru.finalizeDeployment(cmds, vmProfile, dest, ctx);
                addExtraConfig(vmTO);
                work = _workDao.findById(work.getId());
                if (work == null || work.getStep() != Step.Prepare) {
                    throw new ConcurrentOperationException("Work steps have been changed: " + work);
                }
                _workDao.updateStep(work, Step.Starting);
                _agentMgr.send(destHostId, cmds);
                _workDao.updateStep(work, Step.Started);
                startAnswer = cmds.getAnswer(StartAnswer.class);
                if (startAnswer != null && startAnswer.getResult()) {
                    handlePath(vmTO.getDisks(), startAnswer.getIqnToData());
                    final String host_guid = startAnswer.getHost_guid();
                    if (host_guid != null) {
                        final HostVO finalHost = _resourceMgr.findHostByGuid(host_guid);
                        if (finalHost == null) {
                            throw new CloudRuntimeException("Host Guid " + host_guid + " doesn't exist in DB, something went wrong while processing start answer: " + startAnswer);
                        }
                        destHostId = finalHost.getId();
                    }
                    if (vmGuru.finalizeStart(vmProfile, destHostId, cmds, ctx)) {
                        syncDiskChainChange(startAnswer);
                        if (!changeState(vm, Event.OperationSucceeded, destHostId, work, Step.Done)) {
                            s_logger.error("Unable to transition to a new state. VM uuid: " + vm.getUuid() + "VM oldstate:" + vm.getState() + "Event:" + Event.OperationSucceeded);
                            throw new ConcurrentOperationException("Failed to deploy VM" + vm.getUuid());
                        }
                        final GPUDeviceTO gpuDevice = startAnswer.getVirtualMachine().getGpuDevice();
                        if (gpuDevice != null) {
                            _resourceMgr.updateGPUDetails(destHostId, gpuDevice.getGroupDetails());
                        }
                        if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.DEPLOY_VM) != null) {
                            userVmDetailsDao.removeDetail(vm.getId(), VmDetailConstants.DEPLOY_VM);
                        }
                        startedVm = vm;
                        if (s_logger.isDebugEnabled()) {
                            s_logger.debug("Start completed for VM " + vm);
                        }
                        final Host vmHost = _hostDao.findById(destHostId);
                        if (vmHost != null && (VirtualMachine.Type.ConsoleProxy.equals(vm.getType()) || VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) && caManager.canProvisionCertificates()) {
                            final Map<String, String> sshAccessDetails = _networkMgr.getSystemVMAccessDetails(vm);
                            for (int retries = 3; retries > 0; retries--) {
                                try {
                                    setupAgentSecurity(vmHost, sshAccessDetails, vm);
                                    return;
                                } catch (final AgentUnavailableException | OperationTimedoutException e) {
                                    s_logger.error("Retrying after catching exception while trying to secure agent for systemvm id=" + vm.getId(), e);
                                }
                            }
                            throw new CloudRuntimeException("Failed to setup and secure agent for systemvm id=" + vm.getId());
                        }
                        return;
                    } else {
                        if (s_logger.isDebugEnabled()) {
                            s_logger.info("The guru did not like the answers so stopping " + vm);
                        }
                        StopCommand stopCmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false);
                        stopCmd.setControlIp(getControlNicIpForVM(vm));
                        Map<String, Boolean> vlanToPersistenceMap = getVlanToPersistenceMapForVM(vm.getId());
                        if (MapUtils.isNotEmpty(vlanToPersistenceMap)) {
                            stopCmd.setVlanToPersistenceMap(vlanToPersistenceMap);
                        }
                        final StopCommand cmd = stopCmd;
                        final Answer answer = _agentMgr.easySend(destHostId, cmd);
                        if (answer != null && answer instanceof StopAnswer) {
                            final StopAnswer stopAns = (StopAnswer) answer;
                            if (vm.getType() == VirtualMachine.Type.User) {
                                final String platform = stopAns.getPlatform();
                                if (platform != null) {
                                    final Map<String, String> vmmetadata = new HashMap<>();
                                    vmmetadata.put(vm.getInstanceName(), platform);
                                    syncVMMetaData(vmmetadata);
                                }
                            }
                        }
                        if (answer == null || !answer.getResult()) {
                            s_logger.warn("Unable to stop " + vm + " due to " + (answer != null ? answer.getDetails() : "no answers"));
                            _haMgr.scheduleStop(vm, destHostId, WorkType.ForceStop);
                            throw new ExecutionException("Unable to stop this VM, " + vm.getUuid() + " so we are unable to retry the start operation");
                        }
                        throw new ExecutionException("Unable to start  VM:" + vm.getUuid() + " due to error in finalizeStart, not retrying");
                    }
                }
                s_logger.info("Unable to start VM on " + dest.getHost() + " due to " + (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
                if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
                    break;
                }
            } catch (OperationTimedoutException e) {
                s_logger.debug("Unable to send the start command to host " + dest.getHost() + " failed to start VM: " + vm.getUuid());
                if (e.isActive()) {
                    _haMgr.scheduleStop(vm, destHostId, WorkType.CheckStop);
                }
                canRetry = false;
                throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
            } catch (final ResourceUnavailableException e) {
                s_logger.warn("Unable to contact resource.", e);
                if (!avoids.add(e)) {
                    if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
                        throw e;
                    } else {
                        s_logger.warn("unexpected ResourceUnavailableException : " + e.getScope().getName(), e);
                        throw e;
                    }
                }
            } catch (final InsufficientCapacityException e) {
                s_logger.warn("Insufficient capacity ", e);
                if (!avoids.add(e)) {
                    if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
                        throw e;
                    } else {
                        s_logger.warn("unexpected InsufficientCapacityException : " + e.getScope().getName(), e);
                    }
                }
            } catch (ExecutionException | NoTransitionException e) {
                s_logger.error("Failed to start instance " + vm, e);
                throw new AgentUnavailableException("Unable to start instance due to " + e.getMessage(), destHostId, e);
            } catch (final StorageAccessException e) {
                s_logger.warn("Unable to access storage on host", e);
            } finally {
                if (startedVm == null && canRetry) {
                    final Step prevStep = work.getStep();
                    _workDao.updateStep(work, Step.Release);
                    if ((prevStep == Step.Started || prevStep == Step.Starting) && startAnswer != null && startAnswer.getResult()) {
                        cleanup(vmGuru, vmProfile, work, Event.OperationFailed, false);
                    } else {
                        cleanup(vmGuru, vmProfile, work, Event.OperationFailed, true);
                    }
                }
            }
        }
    } finally {
        if (startedVm == null) {
            if (VirtualMachine.Type.User.equals(vm.type) && ResourceCountRunningVMsonly.value()) {
                resourceCountDecrement(owner.getAccountId(), new Long(offering.getCpu()), new Long(offering.getRamSize()));
            }
            if (canRetry) {
                try {
                    changeState(vm, Event.OperationFailed, null, work, Step.Done);
                } catch (final NoTransitionException e) {
                    throw new ConcurrentOperationException(e.getMessage());
                }
            }
        }
        if (planToDeploy != null) {
            planToDeploy.setAvoids(avoids);
        }
    }
    if (startedVm == null) {
        throw new CloudRuntimeException("Unable to start instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
    }
}
Also used : Account(com.cloud.user.Account) StoragePool(com.cloud.storage.StoragePool) StartAnswer(com.cloud.agent.api.StartAnswer) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) Journal(com.cloud.utils.Journal) AffinityConflictException(com.cloud.exception.AffinityConflictException) ServiceOfferingVO(com.cloud.service.ServiceOfferingVO) VirtualMachineTO(com.cloud.agent.api.to.VirtualMachineTO) HypervisorGuru(com.cloud.hypervisor.HypervisorGuru) VolumeVO(com.cloud.storage.VolumeVO) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) DataCenterDeployment(com.cloud.deploy.DataCenterDeployment) VirtualMachineTemplate(com.cloud.template.VirtualMachineTemplate) InsufficientServerCapacityException(com.cloud.exception.InsufficientServerCapacityException) CallContext(org.apache.cloudstack.context.CallContext) HostVO(com.cloud.host.HostVO) StopCommand(com.cloud.agent.api.StopCommand) DeployDestination(com.cloud.deploy.DeployDestination) NoTransitionException(com.cloud.utils.fsm.NoTransitionException) ResourceUnavailableException(com.cloud.exception.ResourceUnavailableException) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) User(com.cloud.user.User) StartCommand(com.cloud.agent.api.StartCommand) Step(com.cloud.vm.ItWorkVO.Step) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) Commands(com.cloud.agent.manager.Commands) ExecutionException(com.cloud.utils.exception.ExecutionException) InsufficientCapacityException(com.cloud.exception.InsufficientCapacityException) StopAnswer(com.cloud.agent.api.StopAnswer) ExcludeList(com.cloud.deploy.DeploymentPlanner.ExcludeList) Host(com.cloud.host.Host) StorageAccessException(com.cloud.exception.StorageAccessException) GPUDeviceTO(com.cloud.agent.api.to.GPUDeviceTO) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) MigrateVmToPoolAnswer(com.cloud.agent.api.MigrateVmToPoolAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) ReplugNicAnswer(com.cloud.agent.api.ReplugNicAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) PrepareForMigrationAnswer(com.cloud.agent.api.PrepareForMigrationAnswer) ClusterDetailsVO(com.cloud.dc.ClusterDetailsVO)

Example 2 with Step

use of com.cloud.vm.ItWorkVO.Step in project cloudstack by apache.

the class VirtualMachineManagerImpl method changeState.

protected <T extends VMInstanceVO> boolean changeState(final T vm, final Event event, final Long hostId, final ItWorkVO work, final Step step) throws NoTransitionException {
    Step previousStep = null;
    if (work != null) {
        previousStep = work.getStep();
        _workDao.updateStep(work, step);
    }
    boolean result = false;
    try {
        result = stateTransitTo(vm, event, hostId);
        return result;
    } finally {
        if (!result && work != null) {
            _workDao.updateStep(work, previousStep);
        }
    }
}
Also used : Step(com.cloud.vm.ItWorkVO.Step)

Example 3 with Step

use of com.cloud.vm.ItWorkVO.Step in project cloudstack by apache.

the class VirtualMachineManagerImpl method cleanup.

protected boolean cleanup(final VirtualMachineGuru guru, final VirtualMachineProfile profile, final ItWorkVO work, final Event event, final boolean cleanUpEvenIfUnableToStop) {
    final VirtualMachine vm = profile.getVirtualMachine();
    final State state = vm.getState();
    s_logger.debug("Cleaning up resources for the vm " + vm + " in " + state + " state");
    try {
        if (state == State.Starting) {
            if (work != null) {
                final Step step = work.getStep();
                if (step == Step.Starting && !cleanUpEvenIfUnableToStop) {
                    s_logger.warn("Unable to cleanup vm " + vm + "; work state is incorrect: " + step);
                    return false;
                }
                if (step == Step.Started || step == Step.Starting || step == Step.Release) {
                    if (vm.getHostId() != null) {
                        if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                            s_logger.warn("Failed to stop vm " + vm + " in " + State.Starting + " state as a part of cleanup process");
                            return false;
                        }
                    }
                }
                if (step != Step.Release && step != Step.Prepare && step != Step.Started && step != Step.Starting) {
                    s_logger.debug("Cleanup is not needed for vm " + vm + "; work state is incorrect: " + step);
                    return true;
                }
            } else {
                if (vm.getHostId() != null) {
                    if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                        s_logger.warn("Failed to stop vm " + vm + " in " + State.Starting + " state as a part of cleanup process");
                        return false;
                    }
                }
            }
        } else if (state == State.Stopping) {
            if (vm.getHostId() != null) {
                if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                    s_logger.warn("Failed to stop vm " + vm + " in " + State.Stopping + " state as a part of cleanup process");
                    return false;
                }
            }
        } else if (state == State.Migrating) {
            if (vm.getHostId() != null) {
                if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                    s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process");
                    return false;
                }
            }
            if (vm.getLastHostId() != null) {
                if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                    s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process");
                    return false;
                }
            }
        } else if (state == State.Running) {
            if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                s_logger.warn("Failed to stop vm " + vm + " in " + State.Running + " state as a part of cleanup process");
                return false;
            }
        }
    } finally {
        releaseVmResources(profile, cleanUpEvenIfUnableToStop);
    }
    return true;
}
Also used : ResourceState(com.cloud.resource.ResourceState) State(com.cloud.vm.VirtualMachine.State) PowerState(com.cloud.vm.VirtualMachine.PowerState) Step(com.cloud.vm.ItWorkVO.Step)

Example 4 with Step

use of com.cloud.vm.ItWorkVO.Step in project cosmic by MissionCriticalCloud.

the class VirtualMachineManagerImpl method orchestrateStart.

@Override
public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
    final CallContext cctxt = CallContext.current();
    final Account account = cctxt.getCallingAccount();
    final User caller = cctxt.getCallingUser();
    VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
    final Ternary<VMInstanceVO, ReservationContext, ItWorkVO> start = changeToStartState(vm, caller, account);
    if (start == null) {
        return;
    }
    vm = start.first();
    final ReservationContext ctx = start.second();
    ItWorkVO work = start.third();
    VMInstanceVO startedVm = null;
    final ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
    final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, vm.getTemplateId());
    s_logger.debug("Trying to deploy VM, vm has dcId: " + vm.getDataCenterId() + " and podId: " + vm.getPodIdToDeployIn());
    DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodIdToDeployIn(), null, null, null, null, ctx);
    if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
        s_logger.debug("advanceStart: DeploymentPlan is provided, using dcId:" + planToDeploy.getDataCenterId() + ", podId: " + planToDeploy.getPodId() + ", clusterId: " + planToDeploy.getClusterId() + ", hostId: " + planToDeploy.getHostId() + ", poolId: " + planToDeploy.getPoolId());
        plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), planToDeploy.getPoolId(), planToDeploy.getPhysicalNetworkId(), ctx);
    }
    final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
    final VirtualMachineGuru vmGuru = getVmGuru(vm);
    boolean canRetry = true;
    ExcludeList avoids = null;
    try {
        final Journal journal = start.second().getJournal();
        if (planToDeploy != null) {
            avoids = planToDeploy.getAvoids();
        }
        if (avoids == null) {
            avoids = new ExcludeList();
        }
        s_logger.debug("VM start orchestration will {}", avoids.toString());
        boolean planChangedByVolume = false;
        boolean reuseVolume = true;
        final DataCenterDeployment originalPlan = plan;
        int retry = StartRetry.value();
        while (retry-- != 0) {
            if (reuseVolume) {
                // edit plan if this vm's ROOT volume is in READY state already
                final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
                for (final VolumeVO vol : vols) {
                    // make sure if the templateId is unchanged. If it is changed,
                    // let planner
                    // reassign pool for the volume even if it ready.
                    final Long volTemplateId = vol.getTemplateId();
                    if (volTemplateId != null && volTemplateId != template.getId()) {
                        s_logger.debug(vol + " of " + vm + " is READY, but template ids don't match, let the planner reassign a new pool");
                        continue;
                    }
                    final StoragePool pool = (StoragePool) dataStoreMgr.getPrimaryDataStore(vol.getPoolId());
                    if (!pool.isInMaintenance()) {
                        s_logger.debug("Root volume is ready, need to place VM in volume's cluster");
                        final long rootVolDcId = pool.getDataCenterId();
                        final Long rootVolPodId = pool.getPodId();
                        final Long rootVolClusterId = pool.getClusterId();
                        if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) {
                            final Long clusterIdSpecified = planToDeploy.getClusterId();
                            if (clusterIdSpecified != null && rootVolClusterId != null) {
                                checkIfPlanIsDeployable(vm, rootVolClusterId, clusterIdSpecified);
                            }
                            plan = new DataCenterDeployment(planToDeploy.getDataCenterId(), planToDeploy.getPodId(), planToDeploy.getClusterId(), planToDeploy.getHostId(), vol.getPoolId(), null, ctx);
                        } else {
                            plan = new DataCenterDeployment(rootVolDcId, rootVolPodId, rootVolClusterId, null, vol.getPoolId(), null, ctx);
                            s_logger.debug(vol + " is READY, changing deployment plan to use this pool's dcId: " + rootVolDcId + " , podId: " + rootVolPodId + " , and clusterId: " + rootVolClusterId);
                            planChangedByVolume = true;
                        }
                    }
                }
            }
            final Account owner = _entityMgr.findById(Account.class, vm.getAccountId());
            final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, owner, params);
            DeployDestination dest;
            try {
                dest = _dpMgr.planDeployment(vmProfile, plan, avoids, planner);
            } catch (final AffinityConflictException e2) {
                s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2);
                throw new CloudRuntimeException("Unable to create deployment, affinity rules associted to the VM conflict");
            }
            if (dest == null) {
                if (planChangedByVolume) {
                    plan = originalPlan;
                    planChangedByVolume = false;
                    // do not enter volume reuse for next retry, since we want to look for resources outside the volume's cluster
                    reuseVolume = false;
                    continue;
                }
                throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(), areAffinityGroupsAssociated(vmProfile));
            }
            if (dest != null) {
                avoids.addHost(dest.getHost().getId());
                journal.record("Deployment found ", vmProfile, dest);
            }
            long destHostId = dest.getHost().getId();
            vm.setPodIdToDeployIn(dest.getPod().getId());
            final Long cluster_id = dest.getCluster().getId();
            final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio");
            final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio");
            // storing the value of overcommit in the vm_details table for doing a capacity check in case the cluster overcommit ratio is changed.
            if (_uservmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") == null && (Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) {
                _uservmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true);
                _uservmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
            } else if (_uservmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") != null) {
                _uservmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true);
                _uservmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
            }
            vmProfile.setCpuOvercommitRatio(Float.parseFloat(cluster_detail_cpu.getValue()));
            vmProfile.setMemoryOvercommitRatio(Float.parseFloat(cluster_detail_ram.getValue()));
            StartAnswer startAnswer = null;
            try {
                if (!changeState(vm, Event.OperationRetry, destHostId, work, Step.Prepare)) {
                    throw new ConcurrentOperationException("Unable to update the state of the Virtual Machine " + vm.getUuid() + " oldstate: " + vm.getState() + "Event :" + Event.OperationRetry);
                }
            } catch (final NoTransitionException e1) {
                throw new ConcurrentOperationException(e1.getMessage());
            }
            try {
                s_logger.debug("VM is being created in podId: " + vm.getPodIdToDeployIn());
                _networkMgr.prepare(vmProfile, dest, ctx);
                volumeMgr.prepare(vmProfile, dest);
                // since StorageMgr succeeded in volume creation, reuse Volume for further tries until current cluster has capacity
                if (!reuseVolume) {
                    reuseVolume = true;
                }
                vmGuru.finalizeVirtualMachineProfile(vmProfile, dest, ctx);
                final VirtualMachineTO vmTO = hvGuru.implement(vmProfile);
                handlePath(vmTO.getDisks(), vm.getHypervisorType());
                final Commands cmds = new Commands(Command.OnError.Stop);
                cmds.addCommand(new StartCommand(vmTO, dest.getHost(), getExecuteInSequence(vm.getHypervisorType())));
                vmGuru.finalizeDeployment(cmds, vmProfile, dest, ctx);
                work = _workDao.findById(work.getId());
                if (work == null || work.getStep() != Step.Prepare) {
                    throw new ConcurrentOperationException("Work steps have been changed: " + work);
                }
                _workDao.updateStep(work, Step.Starting);
                _agentMgr.send(destHostId, cmds);
                _workDao.updateStep(work, Step.Started);
                startAnswer = cmds.getAnswer(StartAnswer.class);
                if (startAnswer != null && startAnswer.getResult()) {
                    handlePath(vmTO.getDisks(), startAnswer.getIqnToPath());
                    final String host_guid = startAnswer.getHost_guid();
                    if (host_guid != null) {
                        final HostVO finalHost = _resourceMgr.findHostByGuid(host_guid);
                        if (finalHost == null) {
                            throw new CloudRuntimeException("Host Guid " + host_guid + " doesn't exist in DB, something went wrong while processing start answer: " + startAnswer);
                        }
                        destHostId = finalHost.getId();
                    }
                    if (vmGuru.finalizeStart(vmProfile, destHostId, cmds, ctx)) {
                        syncDiskChainChange(startAnswer);
                        if (!changeState(vm, Event.OperationSucceeded, destHostId, work, Step.Done)) {
                            s_logger.error("Unable to transition to a new state. VM uuid: " + vm.getUuid() + "VM oldstate:" + vm.getState() + "Event:" + Event.OperationSucceeded);
                            throw new ConcurrentOperationException("Failed to deploy VM" + vm.getUuid());
                        }
                        // Update GPU device capacity
                        final GPUDeviceTO gpuDevice = startAnswer.getVirtualMachine().getGpuDevice();
                        if (gpuDevice != null) {
                            _resourceMgr.updateGPUDetails(destHostId, gpuDevice.getGroupDetails());
                        }
                        startedVm = vm;
                        s_logger.debug("Start completed for VM " + vm);
                        return;
                    } else {
                        s_logger.info("The guru did not like the answers so stopping " + vm);
                        final StopCommand cmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false);
                        final Answer answer = _agentMgr.easySend(destHostId, cmd);
                        if (answer != null && answer instanceof StopAnswer) {
                            final StopAnswer stopAns = (StopAnswer) answer;
                            if (vm.getType() == VirtualMachine.Type.User) {
                                final String platform = stopAns.getPlatform();
                                if (platform != null) {
                                    final Map<String, String> vmmetadata = new HashMap<>();
                                    vmmetadata.put(vm.getInstanceName(), platform);
                                    syncVMMetaData(vmmetadata);
                                }
                            }
                        }
                        if (answer == null || !answer.getResult()) {
                            s_logger.warn("Unable to stop " + vm + " due to " + (answer != null ? answer.getDetails() : "no answers"));
                            _haMgr.scheduleStop(vm, destHostId, WorkType.ForceStop);
                            throw new ExecutionException("Unable to stop this VM, " + vm.getUuid() + " so we are unable to retry the start operation");
                        }
                        throw new ExecutionException("Unable to start  VM:" + vm.getUuid() + " due to error in finalizeStart, not retrying");
                    }
                }
                s_logger.info("Unable to start VM on " + dest.getHost() + " due to " + (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
                if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
                    break;
                }
            } catch (final OperationTimedoutException e) {
                s_logger.debug("Unable to send the start command to host " + dest.getHost() + " failed to start VM: " + vm.getUuid());
                if (e.isActive()) {
                    _haMgr.scheduleStop(vm, destHostId, WorkType.CheckStop);
                }
                canRetry = false;
                throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
            } catch (final ResourceUnavailableException e) {
                s_logger.info("Unable to contact resource.", e);
                if (!avoids.add(e)) {
                    if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
                        throw e;
                    } else {
                        s_logger.warn("unexpected ResourceUnavailableException : " + e.getScope().getName(), e);
                        throw e;
                    }
                }
            } catch (final InsufficientCapacityException e) {
                s_logger.info("Insufficient capacity ", e);
                if (!avoids.add(e)) {
                    if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
                        throw e;
                    } else {
                        s_logger.warn("unexpected InsufficientCapacityException : " + e.getScope().getName(), e);
                    }
                }
            } catch (final ExecutionException | NoTransitionException e) {
                s_logger.error("Failed to start instance " + vm, e);
                throw new AgentUnavailableException("Unable to start instance due to " + e.getMessage(), destHostId, e);
            } finally {
                if (startedVm == null && canRetry) {
                    final Step prevStep = work.getStep();
                    _workDao.updateStep(work, Step.Release);
                    // If previous step was started/ing && we got a valid answer
                    if ((prevStep == Step.Started || prevStep == Step.Starting) && startAnswer != null && startAnswer.getResult()) {
                        // TODO check the response of cleanup
                        // and record it in DB for retry
                        cleanup(vmGuru, vmProfile, work, Event.OperationFailed, false);
                    } else {
                        // if step is not starting/started, send cleanup command with force=true
                        cleanup(vmGuru, vmProfile, work, Event.OperationFailed, true);
                    }
                }
            }
        }
    } finally {
        if (startedVm == null) {
            if (canRetry) {
                try {
                    changeState(vm, Event.OperationFailed, null, work, Step.Done);
                } catch (final NoTransitionException e) {
                    throw new ConcurrentOperationException(e.getMessage());
                }
            }
        }
        if (planToDeploy != null) {
            planToDeploy.setAvoids(avoids);
        }
    }
    if (startedVm == null) {
        throw new CloudRuntimeException("Unable to start instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
    }
}
Also used : Account(com.cloud.user.Account) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) User(com.cloud.user.User) StoragePool(com.cloud.storage.StoragePool) StartAnswer(com.cloud.agent.api.StartAnswer) StartCommand(com.cloud.agent.api.StartCommand) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) Journal(com.cloud.utils.Journal) Step(com.cloud.vm.ItWorkVO.Step) AffinityConflictException(com.cloud.exception.AffinityConflictException) ServiceOfferingVO(com.cloud.service.ServiceOfferingVO) VirtualMachineTO(com.cloud.agent.api.to.VirtualMachineTO) HypervisorGuru(com.cloud.hypervisor.HypervisorGuru) VolumeVO(com.cloud.storage.VolumeVO) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) Commands(com.cloud.agent.manager.Commands) ExecutionException(com.cloud.utils.exception.ExecutionException) InsufficientCapacityException(com.cloud.exception.InsufficientCapacityException) StopAnswer(com.cloud.agent.api.StopAnswer) ExcludeList(com.cloud.deploy.DeploymentPlanner.ExcludeList) DataCenterDeployment(com.cloud.deploy.DataCenterDeployment) VirtualMachineTemplate(com.cloud.template.VirtualMachineTemplate) InsufficientServerCapacityException(com.cloud.exception.InsufficientServerCapacityException) GPUDeviceTO(com.cloud.agent.api.to.GPUDeviceTO) CallContext(com.cloud.context.CallContext) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) HostVO(com.cloud.host.HostVO) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) StopCommand(com.cloud.agent.api.StopCommand) DeployDestination(com.cloud.deploy.DeployDestination) NoTransitionException(com.cloud.utils.fsm.NoTransitionException) ResourceUnavailableException(com.cloud.exception.ResourceUnavailableException) ClusterDetailsVO(com.cloud.dc.ClusterDetailsVO)

Example 5 with Step

use of com.cloud.vm.ItWorkVO.Step in project cosmic by MissionCriticalCloud.

the class VirtualMachineManagerImpl method cleanup.

protected boolean cleanup(final VirtualMachineGuru guru, final VirtualMachineProfile profile, final ItWorkVO work, final Event event, final boolean cleanUpEvenIfUnableToStop) {
    final VirtualMachine vm = profile.getVirtualMachine();
    final State state = vm.getState();
    s_logger.debug("Cleaning up resources for the vm " + vm + " in " + state + " state");
    try {
        if (state == State.Starting) {
            if (work != null) {
                final Step step = work.getStep();
                if (step == Step.Starting && !cleanUpEvenIfUnableToStop) {
                    s_logger.warn("Unable to cleanup vm " + vm + "; work state is incorrect: " + step);
                    return false;
                }
                if (step == Step.Started || step == Step.Starting || step == Step.Release) {
                    if (vm.getHostId() != null) {
                        if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                            s_logger.warn("Failed to stop vm " + vm + " in " + State.Starting + " state as a part of cleanup process");
                            return false;
                        }
                    }
                }
                if (step != Step.Release && step != Step.Prepare && step != Step.Started && step != Step.Starting) {
                    s_logger.debug("Cleanup is not needed for vm " + vm + "; work state is incorrect: " + step);
                    return true;
                }
            } else {
                if (vm.getHostId() != null) {
                    if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                        s_logger.warn("Failed to stop vm " + vm + " in " + State.Starting + " state as a part of cleanup process");
                        return false;
                    }
                }
            }
        } else if (state == State.Stopping) {
            if (vm.getHostId() != null) {
                if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                    s_logger.warn("Failed to stop vm " + vm + " in " + State.Stopping + " state as a part of cleanup process");
                    return false;
                }
            }
        } else if (state == State.Migrating) {
            if (vm.getHostId() != null) {
                if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                    s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process");
                    return false;
                }
            }
            if (vm.getLastHostId() != null) {
                if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                    s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process");
                    return false;
                }
            }
        } else if (state == State.Running) {
            if (!sendStop(guru, profile, cleanUpEvenIfUnableToStop, false)) {
                s_logger.warn("Failed to stop vm " + vm + " in " + State.Running + " state as a part of cleanup process");
                return false;
            }
        }
    } finally {
        try {
            _networkMgr.release(profile, cleanUpEvenIfUnableToStop);
            s_logger.debug("Successfully released network resources for the vm " + vm);
        } catch (final Exception e) {
            s_logger.warn("Unable to release some network resources.", e);
        }
        volumeMgr.release(profile);
        s_logger.debug("Successfully cleanued up resources for the vm " + vm + " in " + state + " state");
    }
    return true;
}
Also used : PowerState(com.cloud.vm.VirtualMachine.PowerState) State(com.cloud.vm.VirtualMachine.State) Step(com.cloud.vm.ItWorkVO.Step) ExecutionException(com.cloud.utils.exception.ExecutionException) ConnectionException(com.cloud.exception.ConnectionException) NoTransitionException(com.cloud.utils.fsm.NoTransitionException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) TransactionCallbackWithException(com.cloud.utils.db.TransactionCallbackWithException) CloudException(com.cloud.exception.CloudException) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) InsufficientCapacityException(com.cloud.exception.InsufficientCapacityException) InsufficientServerCapacityException(com.cloud.exception.InsufficientServerCapacityException) InsufficientAddressCapacityException(com.cloud.exception.InsufficientAddressCapacityException) ResourceUnavailableException(com.cloud.exception.ResourceUnavailableException) StorageUnavailableException(com.cloud.exception.StorageUnavailableException) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) SQLException(java.sql.SQLException) VirtualMachineMigrationException(com.cloud.exception.VirtualMachineMigrationException) AffinityConflictException(com.cloud.exception.AffinityConflictException) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) InsufficientVirtualNetworkCapacityException(com.cloud.exception.InsufficientVirtualNetworkCapacityException) InvalidParameterValueException(com.cloud.utils.exception.InvalidParameterValueException) ConfigurationException(javax.naming.ConfigurationException)

Aggregations

Step (com.cloud.vm.ItWorkVO.Step)6 AffinityConflictException (com.cloud.exception.AffinityConflictException)3 AgentUnavailableException (com.cloud.exception.AgentUnavailableException)3 ConcurrentOperationException (com.cloud.exception.ConcurrentOperationException)3 InsufficientCapacityException (com.cloud.exception.InsufficientCapacityException)3 InsufficientServerCapacityException (com.cloud.exception.InsufficientServerCapacityException)3 OperationTimedoutException (com.cloud.exception.OperationTimedoutException)3 ResourceUnavailableException (com.cloud.exception.ResourceUnavailableException)3 CloudRuntimeException (com.cloud.utils.exception.CloudRuntimeException)3 ExecutionException (com.cloud.utils.exception.ExecutionException)3 NoTransitionException (com.cloud.utils.fsm.NoTransitionException)3 AgentControlAnswer (com.cloud.agent.api.AgentControlAnswer)2 Answer (com.cloud.agent.api.Answer)2 CheckVirtualMachineAnswer (com.cloud.agent.api.CheckVirtualMachineAnswer)2 ClusterVMMetaDataSyncAnswer (com.cloud.agent.api.ClusterVMMetaDataSyncAnswer)2 PlugNicAnswer (com.cloud.agent.api.PlugNicAnswer)2 RebootAnswer (com.cloud.agent.api.RebootAnswer)2 RestoreVMSnapshotAnswer (com.cloud.agent.api.RestoreVMSnapshotAnswer)2 StartAnswer (com.cloud.agent.api.StartAnswer)2 StartCommand (com.cloud.agent.api.StartCommand)2