Search in sources :

Example 91 with OperationTimedoutException

use of com.cloud.exception.OperationTimedoutException in project cloudstack by apache.

the class VirtualMachineManagerImpl method orchestrateMigrateForScale.

private void orchestrateMigrateForScale(final String vmUuid, final long srcHostId, final DeployDestination dest, final Long oldSvcOfferingId) throws ResourceUnavailableException, ConcurrentOperationException {
    VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
    s_logger.info(String.format("Migrating %s to %s", vm, dest));
    vm.getServiceOfferingId();
    final long dstHostId = dest.getHost().getId();
    final Host fromHost = _hostDao.findById(srcHostId);
    Host srcHost = _hostDao.findById(srcHostId);
    if (fromHost == null) {
        String logMessageUnableToFindHost = String.format("Unable to find host to migrate from %s.", srcHost);
        s_logger.info(logMessageUnableToFindHost);
        throw new CloudRuntimeException(logMessageUnableToFindHost);
    }
    Host dstHost = _hostDao.findById(dstHostId);
    long destHostClusterId = dest.getCluster().getId();
    long fromHostClusterId = fromHost.getClusterId();
    if (fromHostClusterId != destHostClusterId) {
        String logMessageHostsOnDifferentCluster = String.format("Source and destination host are not in same cluster, unable to migrate to %s", srcHost);
        s_logger.info(logMessageHostsOnDifferentCluster);
        throw new CloudRuntimeException(logMessageHostsOnDifferentCluster);
    }
    final VirtualMachineGuru vmGuru = getVmGuru(vm);
    vm = _vmDao.findByUuid(vmUuid);
    if (vm == null) {
        String message = String.format("Unable to find VM {\"uuid\": \"%s\"}.", vmUuid);
        s_logger.warn(message);
        throw new CloudRuntimeException(message);
    }
    if (vm.getState() != State.Running) {
        String message = String.format("%s is not in \"Running\" state, unable to migrate it. Current state [%s].", vm.toString(), vm.getState());
        s_logger.warn(message);
        throw new CloudRuntimeException(message);
    }
    AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM_MIGRATE;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
    }
    final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
    _networkMgr.prepareNicForMigration(profile, dest);
    volumeMgr.prepareForMigration(profile, dest);
    final VirtualMachineTO to = toVmTO(profile);
    final PrepareForMigrationCommand pfmc = new PrepareForMigrationCommand(to);
    ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId());
    work.setStep(Step.Prepare);
    work.setResourceType(ItWorkVO.ResourceType.Host);
    work.setResourceId(dstHostId);
    work = _workDao.persist(work);
    Answer pfma = null;
    try {
        pfma = _agentMgr.send(dstHostId, pfmc);
        if (pfma == null || !pfma.getResult()) {
            final String details = pfma != null ? pfma.getDetails() : "null answer returned";
            pfma = null;
            throw new AgentUnavailableException(String.format("Unable to prepare for migration to destination host [%s] due to [%s].", dstHostId, details), dstHostId);
        }
    } catch (final OperationTimedoutException e1) {
        throw new AgentUnavailableException("Operation timed out", dstHostId);
    } finally {
        if (pfma == null) {
            work.setStep(Step.Done);
            _workDao.update(work.getId(), work);
        }
    }
    vm.setLastHostId(srcHostId);
    try {
        if (vm.getHostId() == null || vm.getHostId() != srcHostId || !changeState(vm, Event.MigrationRequested, dstHostId, work, Step.Migrating)) {
            String message = String.format("Migration of %s cancelled because state has changed.", vm.toString());
            s_logger.warn(message);
            throw new ConcurrentOperationException(message);
        }
    } catch (final NoTransitionException e1) {
        String message = String.format("Migration of %s cancelled due to [%s].", vm.toString(), e1.getMessage());
        s_logger.error(message, e1);
        throw new ConcurrentOperationException(message);
    }
    boolean migrated = false;
    try {
        Map<String, Boolean> vlanToPersistenceMap = getVlanToPersistenceMapForVM(vm.getId());
        final boolean isWindows = _guestOsCategoryDao.findById(_guestOsDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows");
        final MigrateCommand mc = new MigrateCommand(vm.getInstanceName(), dest.getHost().getPrivateIpAddress(), isWindows, to, getExecuteInSequence(vm.getHypervisorType()));
        if (MapUtils.isNotEmpty(vlanToPersistenceMap)) {
            mc.setVlanToPersistenceMap(vlanToPersistenceMap);
        }
        boolean kvmAutoConvergence = StorageManager.KvmAutoConvergence.value();
        mc.setAutoConvergence(kvmAutoConvergence);
        mc.setHostGuid(dest.getHost().getGuid());
        try {
            final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
            if (ma == null || !ma.getResult()) {
                String msg = String.format("Unable to migrate %s due to [%s].", vm.toString(), ma != null ? ma.getDetails() : "null answer returned");
                s_logger.error(msg);
                throw new CloudRuntimeException(msg);
            }
        } catch (final OperationTimedoutException e) {
            if (e.isActive()) {
                s_logger.warn("Active migration command so scheduling a restart for " + vm, e);
                _haMgr.scheduleRestart(vm, true);
            }
            throw new AgentUnavailableException("Operation timed out on migrating " + vm, dstHostId, e);
        }
        try {
            final long newServiceOfferingId = vm.getServiceOfferingId();
            vm.setServiceOfferingId(oldSvcOfferingId);
            if (!changeState(vm, VirtualMachine.Event.OperationSucceeded, dstHostId, work, Step.Started)) {
                throw new ConcurrentOperationException("Unable to change the state for " + vm);
            }
            vm.setServiceOfferingId(newServiceOfferingId);
        } catch (final NoTransitionException e1) {
            throw new ConcurrentOperationException("Unable to change state due to " + e1.getMessage());
        }
        try {
            if (!checkVmOnHost(vm, dstHostId)) {
                s_logger.error("Unable to complete migration for " + vm);
                try {
                    _agentMgr.send(srcHostId, new Commands(cleanup(vm.getInstanceName())), null);
                } catch (final AgentUnavailableException e) {
                    s_logger.error(String.format("Unable to cleanup source host [%s] due to [%s].", srcHostId, e.getMessage()), e);
                }
                cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.AgentReportStopped, true);
                throw new CloudRuntimeException("Unable to complete migration for " + vm);
            }
        } catch (final OperationTimedoutException e) {
            s_logger.debug(String.format("Error while checking the %s on %s", vm, dstHost), e);
        }
        migrated = true;
    } finally {
        if (!migrated) {
            s_logger.info("Migration was unsuccessful.  Cleaning up: " + vm);
            String alertSubject = String.format("Unable to migrate %s from %s in Zone [%s] and Pod [%s].", vm.getInstanceName(), fromHost, dest.getDataCenter().getName(), dest.getPod().getName());
            String alertBody = "Migrate Command failed. Please check logs.";
            _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), alertSubject, alertBody);
            try {
                _agentMgr.send(dstHostId, new Commands(cleanup(vm.getInstanceName())), null);
            } catch (final AgentUnavailableException ae) {
                s_logger.info("Looks like the destination Host is unavailable for cleanup");
            }
            _networkMgr.setHypervisorHostname(profile, dest, false);
            try {
                stateTransitTo(vm, Event.OperationFailed, srcHostId);
            } catch (final NoTransitionException e) {
                s_logger.warn(e.getMessage(), e);
            }
        } else {
            _networkMgr.setHypervisorHostname(profile, dest, true);
            updateVmPod(vm, dstHostId);
        }
        work.setStep(Step.Done);
        _workDao.update(work.getId(), work);
    }
}
Also used : AlertManager(com.cloud.alert.AlertManager) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) Host(com.cloud.host.Host) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) VirtualMachineTO(com.cloud.agent.api.to.VirtualMachineTO) MigrateCommand(com.cloud.agent.api.MigrateCommand) MigrateVmToPoolAnswer(com.cloud.agent.api.MigrateVmToPoolAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) ReplugNicAnswer(com.cloud.agent.api.ReplugNicAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) PrepareForMigrationAnswer(com.cloud.agent.api.PrepareForMigrationAnswer) PrepareForMigrationCommand(com.cloud.agent.api.PrepareForMigrationCommand) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) NoTransitionException(com.cloud.utils.fsm.NoTransitionException) Commands(com.cloud.agent.manager.Commands)

Example 92 with OperationTimedoutException

use of com.cloud.exception.OperationTimedoutException in project cloudstack by apache.

the class VirtualMachineManagerImpl method orchestrateReConfigureVm.

private VMInstanceVO orchestrateReConfigureVm(String vmUuid, ServiceOffering oldServiceOffering, ServiceOffering newServiceOffering, boolean reconfiguringOnExistingHost) throws ResourceUnavailableException, ConcurrentOperationException {
    final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
    HostVO hostVo = _hostDao.findById(vm.getHostId());
    Long clustedId = hostVo.getClusterId();
    Float memoryOvercommitRatio = CapacityManager.MemOverprovisioningFactor.valueIn(clustedId);
    Float cpuOvercommitRatio = CapacityManager.CpuOverprovisioningFactor.valueIn(clustedId);
    boolean divideMemoryByOverprovisioning = HypervisorGuruBase.VmMinMemoryEqualsMemoryDividedByMemOverprovisioningFactor.valueIn(clustedId);
    boolean divideCpuByOverprovisioning = HypervisorGuruBase.VmMinCpuSpeedEqualsCpuSpeedDividedByCpuOverprovisioningFactor.valueIn(clustedId);
    int minMemory = (int) (newServiceOffering.getRamSize() / (divideMemoryByOverprovisioning ? memoryOvercommitRatio : 1));
    int minSpeed = (int) (newServiceOffering.getSpeed() / (divideCpuByOverprovisioning ? cpuOvercommitRatio : 1));
    ScaleVmCommand scaleVmCommand = new ScaleVmCommand(vm.getInstanceName(), newServiceOffering.getCpu(), minSpeed, newServiceOffering.getSpeed(), minMemory * 1024L * 1024L, newServiceOffering.getRamSize() * 1024L * 1024L, newServiceOffering.getLimitCpuUse());
    scaleVmCommand.getVirtualMachine().setId(vm.getId());
    scaleVmCommand.getVirtualMachine().setUuid(vm.getUuid());
    scaleVmCommand.getVirtualMachine().setType(vm.getType());
    Long dstHostId = vm.getHostId();
    if (vm.getHypervisorType().equals(HypervisorType.VMware)) {
        HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
        Map<String, String> details = hvGuru.getClusterSettings(vm.getId());
        scaleVmCommand.getVirtualMachine().setDetails(details);
    }
    ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Running, vm.getType(), vm.getId());
    work.setStep(Step.Prepare);
    work.setResourceType(ItWorkVO.ResourceType.Host);
    work.setResourceId(vm.getHostId());
    _workDao.persist(work);
    try {
        Answer reconfigureAnswer = _agentMgr.send(vm.getHostId(), scaleVmCommand);
        if (reconfigureAnswer == null || !reconfigureAnswer.getResult()) {
            s_logger.error("Unable to scale vm due to " + (reconfigureAnswer == null ? "" : reconfigureAnswer.getDetails()));
            throw new CloudRuntimeException("Unable to scale vm due to " + (reconfigureAnswer == null ? "" : reconfigureAnswer.getDetails()));
        }
        if (vm.getType().equals(VirtualMachine.Type.User)) {
            _userVmMgr.generateUsageEvent(vm, vm.isDisplayVm(), EventTypes.EVENT_VM_DYNAMIC_SCALE);
        }
        upgradeVmDb(vm.getId(), newServiceOffering, oldServiceOffering);
        if (reconfiguringOnExistingHost) {
            vm.setServiceOfferingId(oldServiceOffering.getId());
            _capacityMgr.releaseVmCapacity(vm, false, false, vm.getHostId());
            vm.setServiceOfferingId(newServiceOffering.getId());
            _capacityMgr.allocateVmCapacity(vm, false);
        }
    } catch (final OperationTimedoutException e) {
        throw new AgentUnavailableException("Operation timed out on reconfiguring " + vm, dstHostId);
    } catch (final AgentUnavailableException e) {
        throw e;
    }
    return vm;
}
Also used : OperationTimedoutException(com.cloud.exception.OperationTimedoutException) HostVO(com.cloud.host.HostVO) MigrateVmToPoolAnswer(com.cloud.agent.api.MigrateVmToPoolAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) ReplugNicAnswer(com.cloud.agent.api.ReplugNicAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) PrepareForMigrationAnswer(com.cloud.agent.api.PrepareForMigrationAnswer) HypervisorGuru(com.cloud.hypervisor.HypervisorGuru) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) ScaleVmCommand(com.cloud.agent.api.ScaleVmCommand)

Example 93 with OperationTimedoutException

use of com.cloud.exception.OperationTimedoutException in project cloudstack by apache.

the class VirtualMachineManagerImpl method migrate.

protected void migrate(final VMInstanceVO vm, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException {
    s_logger.info("Migrating " + vm + " to " + dest);
    final long dstHostId = dest.getHost().getId();
    final Host fromHost = _hostDao.findById(srcHostId);
    if (fromHost == null) {
        s_logger.info("Unable to find the host to migrate from: " + srcHostId);
        throw new CloudRuntimeException("Unable to find the host to migrate from: " + srcHostId);
    }
    if (fromHost.getClusterId() != dest.getCluster().getId() && vm.getHypervisorType() != HypervisorType.VMware) {
        final List<VolumeVO> volumes = _volsDao.findCreatedByInstance(vm.getId());
        for (final VolumeVO volume : volumes) {
            if (!_storagePoolDao.findById(volume.getPoolId()).getScope().equals(ScopeType.ZONE)) {
                s_logger.info("Source and destination host are not in same cluster and all volumes are not on zone wide primary store, unable to migrate to host: " + dest.getHost().getId());
                throw new CloudRuntimeException("Source and destination host are not in same cluster and all volumes are not on zone wide primary store, unable to migrate to host: " + dest.getHost().getId());
            }
        }
    }
    final VirtualMachineGuru vmGuru = getVmGuru(vm);
    if (vm.getState() != State.Running) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("VM is not Running, unable to migrate the vm " + vm);
        }
        throw new CloudRuntimeException("VM is not Running, unable to migrate the vm currently " + vm + " , current state: " + vm.getState().toString());
    }
    AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM_MIGRATE;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
    }
    final VirtualMachineProfile vmSrc = new VirtualMachineProfileImpl(vm);
    vmSrc.setHost(fromHost);
    for (final NicProfile nic : _networkMgr.getNicProfiles(vm)) {
        vmSrc.addNic(nic);
    }
    final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, _offeringDao.findById(vm.getId(), vm.getServiceOfferingId()), null, null);
    profile.setHost(dest.getHost());
    _networkMgr.prepareNicForMigration(profile, dest);
    volumeMgr.prepareForMigration(profile, dest);
    profile.setConfigDriveLabel(VmConfigDriveLabel.value());
    final VirtualMachineTO to = toVmTO(profile);
    final PrepareForMigrationCommand pfmc = new PrepareForMigrationCommand(to);
    ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId());
    work.setStep(Step.Prepare);
    work.setResourceType(ItWorkVO.ResourceType.Host);
    work.setResourceId(dstHostId);
    work = _workDao.persist(work);
    Answer pfma = null;
    try {
        pfma = _agentMgr.send(dstHostId, pfmc);
        if (pfma == null || !pfma.getResult()) {
            final String details = pfma != null ? pfma.getDetails() : "null answer returned";
            final String msg = "Unable to prepare for migration due to " + details;
            pfma = null;
            throw new AgentUnavailableException(msg, dstHostId);
        }
    } catch (final OperationTimedoutException e1) {
        throw new AgentUnavailableException("Operation timed out", dstHostId);
    } finally {
        if (pfma == null) {
            _networkMgr.rollbackNicForMigration(vmSrc, profile);
            volumeMgr.release(vm.getId(), dstHostId);
            work.setStep(Step.Done);
            _workDao.update(work.getId(), work);
        }
    }
    vm.setLastHostId(srcHostId);
    try {
        if (vm.getHostId() == null || vm.getHostId() != srcHostId || !changeState(vm, Event.MigrationRequested, dstHostId, work, Step.Migrating)) {
            _networkMgr.rollbackNicForMigration(vmSrc, profile);
            if (vm != null) {
                volumeMgr.release(vm.getId(), dstHostId);
            }
            s_logger.info("Migration cancelled because state has changed: " + vm);
            throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
        }
    } catch (final NoTransitionException e1) {
        _networkMgr.rollbackNicForMigration(vmSrc, profile);
        volumeMgr.release(vm.getId(), dstHostId);
        s_logger.info("Migration cancelled because " + e1.getMessage());
        throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
    } catch (final CloudRuntimeException e2) {
        _networkMgr.rollbackNicForMigration(vmSrc, profile);
        volumeMgr.release(vm.getId(), dstHostId);
        s_logger.info("Migration cancelled because " + e2.getMessage());
        work.setStep(Step.Done);
        _workDao.update(work.getId(), work);
        try {
            stateTransitTo(vm, Event.OperationFailed, srcHostId);
        } catch (final NoTransitionException e3) {
            s_logger.warn(e3.getMessage());
        }
        throw new CloudRuntimeException("Migration cancelled because " + e2.getMessage());
    }
    boolean migrated = false;
    Map<String, DpdkTO> dpdkInterfaceMapping = null;
    try {
        final boolean isWindows = _guestOsCategoryDao.findById(_guestOsDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows");
        Map<String, Boolean> vlanToPersistenceMap = getVlanToPersistenceMapForVM(vm.getId());
        final MigrateCommand mc = new MigrateCommand(vm.getInstanceName(), dest.getHost().getPrivateIpAddress(), isWindows, to, getExecuteInSequence(vm.getHypervisorType()));
        if (MapUtils.isNotEmpty(vlanToPersistenceMap)) {
            mc.setVlanToPersistenceMap(vlanToPersistenceMap);
        }
        boolean kvmAutoConvergence = StorageManager.KvmAutoConvergence.value();
        mc.setAutoConvergence(kvmAutoConvergence);
        mc.setHostGuid(dest.getHost().getGuid());
        dpdkInterfaceMapping = ((PrepareForMigrationAnswer) pfma).getDpdkInterfaceMapping();
        if (MapUtils.isNotEmpty(dpdkInterfaceMapping)) {
            mc.setDpdkInterfaceMapping(dpdkInterfaceMapping);
        }
        try {
            final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
            if (ma == null || !ma.getResult()) {
                final String details = ma != null ? ma.getDetails() : "null answer returned";
                throw new CloudRuntimeException(details);
            }
        } catch (final OperationTimedoutException e) {
            if (e.isActive()) {
                s_logger.warn("Active migration command so scheduling a restart for " + vm, e);
                _haMgr.scheduleRestart(vm, true);
            }
            throw new AgentUnavailableException("Operation timed out on migrating " + vm, dstHostId);
        }
        try {
            if (!changeState(vm, VirtualMachine.Event.OperationSucceeded, dstHostId, work, Step.Started)) {
                throw new ConcurrentOperationException("Unable to change the state for " + vm);
            }
        } catch (final NoTransitionException e1) {
            throw new ConcurrentOperationException("Unable to change state due to " + e1.getMessage());
        }
        try {
            if (!checkVmOnHost(vm, dstHostId)) {
                s_logger.error("Unable to complete migration for " + vm);
                try {
                    _agentMgr.send(srcHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
                } catch (final AgentUnavailableException e) {
                    s_logger.error("AgentUnavailableException while cleanup on source host: " + srcHostId, e);
                }
                cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.AgentReportStopped, true);
                throw new CloudRuntimeException("Unable to complete migration for " + vm);
            }
        } catch (final OperationTimedoutException e) {
            s_logger.warn("Error while checking the vm " + vm + " on host " + dstHostId, e);
        }
        migrated = true;
    } finally {
        if (!migrated) {
            s_logger.info("Migration was unsuccessful.  Cleaning up: " + vm);
            _networkMgr.rollbackNicForMigration(vmSrc, profile);
            volumeMgr.release(vm.getId(), dstHostId);
            _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getInstanceName() + " from host " + fromHost.getName() + " in zone " + dest.getDataCenter().getName() + " and pod " + dest.getPod().getName(), "Migrate Command failed.  Please check logs.");
            try {
                _agentMgr.send(dstHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
            } catch (final AgentUnavailableException ae) {
                s_logger.warn("Looks like the destination Host is unavailable for cleanup", ae);
            }
            _networkMgr.setHypervisorHostname(profile, dest, false);
            try {
                stateTransitTo(vm, Event.OperationFailed, srcHostId);
            } catch (final NoTransitionException e) {
                s_logger.warn(e.getMessage());
            }
        } else {
            _networkMgr.commitNicForMigration(vmSrc, profile);
            volumeMgr.release(vm.getId(), srcHostId);
            _networkMgr.setHypervisorHostname(profile, dest, true);
            updateVmPod(vm, dstHostId);
        }
        work.setStep(Step.Done);
        _workDao.update(work.getId(), work);
    }
}
Also used : AlertManager(com.cloud.alert.AlertManager) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) DpdkTO(com.cloud.agent.api.to.DpdkTO) VirtualMachineTO(com.cloud.agent.api.to.VirtualMachineTO) MigrateCommand(com.cloud.agent.api.MigrateCommand) VolumeVO(com.cloud.storage.VolumeVO) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) Commands(com.cloud.agent.manager.Commands) Host(com.cloud.host.Host) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) MigrateVmToPoolAnswer(com.cloud.agent.api.MigrateVmToPoolAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) ReplugNicAnswer(com.cloud.agent.api.ReplugNicAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) PrepareForMigrationAnswer(com.cloud.agent.api.PrepareForMigrationAnswer) PrepareForMigrationCommand(com.cloud.agent.api.PrepareForMigrationCommand) NoTransitionException(com.cloud.utils.fsm.NoTransitionException)

Example 94 with OperationTimedoutException

use of com.cloud.exception.OperationTimedoutException in project cloudstack by apache.

the class VirtualMachineManagerImpl method advanceStop.

private void advanceStop(final VMInstanceVO vm, final boolean cleanUpEvenIfUnableToStop) throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException {
    final State state = vm.getState();
    if (state == State.Stopped) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("VM is already stopped: " + vm);
        }
        return;
    }
    if (state == State.Destroyed || state == State.Expunging || state == State.Error) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("Stopped called on " + vm + " but the state is " + state);
        }
        return;
    }
    final ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState());
    if (work != null) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("Found an outstanding work item for this vm " + vm + " with state:" + vm.getState() + ", work id:" + work.getId());
        }
    }
    final Long hostId = vm.getHostId();
    if (hostId == null) {
        if (!cleanUpEvenIfUnableToStop) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("HostId is null but this is not a forced stop, cannot stop vm " + vm + " with state:" + vm.getState());
            }
            throw new CloudRuntimeException("Unable to stop " + vm);
        }
        try {
            stateTransitTo(vm, Event.AgentReportStopped, null, null);
        } catch (final NoTransitionException e) {
            s_logger.warn(e.getMessage());
        }
        if (work != null) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("Updating work item to Done, id:" + work.getId());
            }
            work.setStep(Step.Done);
            _workDao.update(work.getId(), work);
        }
        return;
    } else {
        HostVO host = _hostDao.findById(hostId);
        if (!cleanUpEvenIfUnableToStop && vm.getState() == State.Running && host.getResourceState() == ResourceState.PrepareForMaintenance) {
            s_logger.debug("Host is in PrepareForMaintenance state - Stop VM operation on the VM id: " + vm.getId() + " is not allowed");
            throw new CloudRuntimeException("Stop VM operation on the VM id: " + vm.getId() + " is not allowed as host is preparing for maintenance mode");
        }
    }
    final VirtualMachineGuru vmGuru = getVmGuru(vm);
    final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
    try {
        if (!stateTransitTo(vm, Event.StopRequested, vm.getHostId())) {
            throw new ConcurrentOperationException(String.format("%s is being operated on.", vm.toString()));
        }
    } catch (final NoTransitionException e1) {
        if (!cleanUpEvenIfUnableToStop) {
            throw new CloudRuntimeException("We cannot stop " + vm + " when it is in state " + vm.getState());
        }
        final boolean doCleanup = true;
        if (s_logger.isDebugEnabled()) {
            s_logger.warn("Unable to transition the state but we're moving on because it's forced stop", e1);
        }
        if (doCleanup) {
            if (cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.StopRequested, cleanUpEvenIfUnableToStop)) {
                try {
                    if (s_logger.isDebugEnabled() && work != null) {
                        s_logger.debug("Updating work item to Done, id:" + work.getId());
                    }
                    if (!changeState(vm, Event.AgentReportStopped, null, work, Step.Done)) {
                        throw new CloudRuntimeException("Unable to stop " + vm);
                    }
                } catch (final NoTransitionException e) {
                    s_logger.warn("Unable to cleanup " + vm);
                    throw new CloudRuntimeException("Unable to stop " + vm, e);
                }
            } else {
                if (s_logger.isDebugEnabled()) {
                    s_logger.debug("Failed to cleanup VM: " + vm);
                }
                throw new CloudRuntimeException("Failed to cleanup " + vm + " , current state " + vm.getState());
            }
        }
    }
    if (vm.getState() != State.Stopping) {
        throw new CloudRuntimeException("We cannot proceed with stop VM " + vm + " since it is not in 'Stopping' state, current state: " + vm.getState());
    }
    vmGuru.prepareStop(profile);
    Map<String, Boolean> vlanToPersistenceMap = getVlanToPersistenceMapForVM(vm.getId());
    final StopCommand stop = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), false, cleanUpEvenIfUnableToStop);
    stop.setControlIp(getControlNicIpForVM(vm));
    if (MapUtils.isNotEmpty(vlanToPersistenceMap)) {
        stop.setVlanToPersistenceMap(vlanToPersistenceMap);
    }
    boolean stopped = false;
    Answer answer = null;
    try {
        answer = _agentMgr.send(vm.getHostId(), stop);
        if (answer != null) {
            if (answer instanceof StopAnswer) {
                final StopAnswer stopAns = (StopAnswer) answer;
                if (vm.getType() == VirtualMachine.Type.User) {
                    final String platform = stopAns.getPlatform();
                    if (platform != null) {
                        final UserVmVO userVm = _userVmDao.findById(vm.getId());
                        _userVmDao.loadDetails(userVm);
                        userVm.setDetail(VmDetailConstants.PLATFORM, platform);
                        _userVmDao.saveDetails(userVm);
                    }
                }
            }
            stopped = answer.getResult();
            if (!stopped) {
                throw new CloudRuntimeException("Unable to stop the virtual machine due to " + answer.getDetails());
            }
            vmGuru.finalizeStop(profile, answer);
            final GPUDeviceTO gpuDevice = stop.getGpuDevice();
            if (gpuDevice != null) {
                _resourceMgr.updateGPUDetails(vm.getHostId(), gpuDevice.getGroupDetails());
            }
        } else {
            throw new CloudRuntimeException("Invalid answer received in response to a StopCommand on " + vm.instanceName);
        }
    } catch (AgentUnavailableException | OperationTimedoutException e) {
        s_logger.warn(String.format("Unable to stop %s due to [%s].", profile.toString(), e.toString()), e);
    } finally {
        if (!stopped) {
            if (!cleanUpEvenIfUnableToStop) {
                s_logger.warn("Unable to stop vm " + vm);
                try {
                    stateTransitTo(vm, Event.OperationFailed, vm.getHostId());
                } catch (final NoTransitionException e) {
                    s_logger.warn("Unable to transition the state " + vm, e);
                }
                throw new CloudRuntimeException("Unable to stop " + vm);
            } else {
                s_logger.warn("Unable to actually stop " + vm + " but continue with release because it's a force stop");
                vmGuru.finalizeStop(profile, answer);
            }
        } else {
            if (VirtualMachine.systemVMs.contains(vm.getType())) {
                HostVO systemVmHost = ApiDBUtils.findHostByTypeNameAndZoneId(vm.getDataCenterId(), vm.getHostName(), VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) ? Host.Type.SecondaryStorageVM : Host.Type.ConsoleProxy);
                if (systemVmHost != null) {
                    _agentMgr.agentStatusTransitTo(systemVmHost, Status.Event.ShutdownRequested, _nodeId);
                }
            }
        }
    }
    if (s_logger.isDebugEnabled()) {
        s_logger.debug(vm + " is stopped on the host.  Proceeding to release resource held.");
    }
    releaseVmResources(profile, cleanUpEvenIfUnableToStop);
    try {
        if (work != null) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("Updating the outstanding work item to Done, id:" + work.getId());
            }
            work.setStep(Step.Done);
            _workDao.update(work.getId(), work);
        }
        boolean result = stateTransitTo(vm, Event.OperationSucceeded, null);
        if (result) {
            if (VirtualMachine.Type.User.equals(vm.type) && ResourceCountRunningVMsonly.value()) {
                ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId());
                resourceCountDecrement(vm.getAccountId(), new Long(offering.getCpu()), new Long(offering.getRamSize()));
            }
        } else {
            throw new CloudRuntimeException("unable to stop " + vm);
        }
    } catch (final NoTransitionException e) {
        String message = String.format("Unable to stop %s due to [%s].", vm.toString(), e.getMessage());
        s_logger.warn(message, e);
        throw new CloudRuntimeException(message, e);
    }
}
Also used : OperationTimedoutException(com.cloud.exception.OperationTimedoutException) GPUDeviceTO(com.cloud.agent.api.to.GPUDeviceTO) ConcurrentOperationException(com.cloud.exception.ConcurrentOperationException) ServiceOfferingVO(com.cloud.service.ServiceOfferingVO) HostVO(com.cloud.host.HostVO) MigrateVmToPoolAnswer(com.cloud.agent.api.MigrateVmToPoolAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) ReplugNicAnswer(com.cloud.agent.api.ReplugNicAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) PrepareForMigrationAnswer(com.cloud.agent.api.PrepareForMigrationAnswer) StopCommand(com.cloud.agent.api.StopCommand) ResourceState(com.cloud.resource.ResourceState) State(com.cloud.vm.VirtualMachine.State) PowerState(com.cloud.vm.VirtualMachine.PowerState) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) NoTransitionException(com.cloud.utils.fsm.NoTransitionException) StopAnswer(com.cloud.agent.api.StopAnswer)

Example 95 with OperationTimedoutException

use of com.cloud.exception.OperationTimedoutException in project cloudstack by apache.

the class VirtualMachineManagerImpl method sendStop.

protected boolean sendStop(final VirtualMachineGuru guru, final VirtualMachineProfile profile, final boolean force, final boolean checkBeforeCleanup) {
    final VirtualMachine vm = profile.getVirtualMachine();
    Map<String, Boolean> vlanToPersistenceMap = getVlanToPersistenceMapForVM(vm.getId());
    StopCommand stpCmd = new StopCommand(vm, getExecuteInSequence(vm.getHypervisorType()), checkBeforeCleanup);
    if (MapUtils.isNotEmpty(vlanToPersistenceMap)) {
        stpCmd.setVlanToPersistenceMap(vlanToPersistenceMap);
    }
    stpCmd.setControlIp(getControlNicIpForVM(vm));
    stpCmd.setVolumesToDisconnect(getVolumesToDisconnect(vm));
    final StopCommand stop = stpCmd;
    try {
        Answer answer = null;
        if (vm.getHostId() != null) {
            answer = _agentMgr.send(vm.getHostId(), stop);
        }
        if (answer != null && answer instanceof StopAnswer) {
            final StopAnswer stopAns = (StopAnswer) answer;
            if (vm.getType() == VirtualMachine.Type.User) {
                final String platform = stopAns.getPlatform();
                if (platform != null) {
                    final UserVmVO userVm = _userVmDao.findById(vm.getId());
                    _userVmDao.loadDetails(userVm);
                    userVm.setDetail(VmDetailConstants.PLATFORM, platform);
                    _userVmDao.saveDetails(userVm);
                }
            }
            final GPUDeviceTO gpuDevice = stop.getGpuDevice();
            if (gpuDevice != null) {
                _resourceMgr.updateGPUDetails(vm.getHostId(), gpuDevice.getGroupDetails());
            }
            if (!answer.getResult()) {
                final String details = answer.getDetails();
                s_logger.debug("Unable to stop VM due to " + details);
                return false;
            }
            guru.finalizeStop(profile, answer);
            final UserVmVO userVm = _userVmDao.findById(vm.getId());
            if (vm.getType() == VirtualMachine.Type.User) {
                if (userVm != null) {
                    userVm.setPowerState(PowerState.PowerOff);
                    _userVmDao.update(userVm.getId(), userVm);
                }
            }
        } else {
            s_logger.error("Invalid answer received in response to a StopCommand for " + vm.getInstanceName());
            return false;
        }
    } catch (final AgentUnavailableException | OperationTimedoutException e) {
        s_logger.warn(String.format("Unable to stop %s due to [%s].", vm.toString(), e.getMessage()), e);
        if (!force) {
            return false;
        }
    }
    return true;
}
Also used : MigrateVmToPoolAnswer(com.cloud.agent.api.MigrateVmToPoolAnswer) StopAnswer(com.cloud.agent.api.StopAnswer) Answer(com.cloud.agent.api.Answer) UnPlugNicAnswer(com.cloud.agent.api.UnPlugNicAnswer) AgentControlAnswer(com.cloud.agent.api.AgentControlAnswer) RebootAnswer(com.cloud.agent.api.RebootAnswer) StartAnswer(com.cloud.agent.api.StartAnswer) ReplugNicAnswer(com.cloud.agent.api.ReplugNicAnswer) RestoreVMSnapshotAnswer(com.cloud.agent.api.RestoreVMSnapshotAnswer) PlugNicAnswer(com.cloud.agent.api.PlugNicAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.agent.api.ClusterVMMetaDataSyncAnswer) CheckVirtualMachineAnswer(com.cloud.agent.api.CheckVirtualMachineAnswer) PrepareForMigrationAnswer(com.cloud.agent.api.PrepareForMigrationAnswer) StopCommand(com.cloud.agent.api.StopCommand) OperationTimedoutException(com.cloud.exception.OperationTimedoutException) AgentUnavailableException(com.cloud.exception.AgentUnavailableException) GPUDeviceTO(com.cloud.agent.api.to.GPUDeviceTO) StopAnswer(com.cloud.agent.api.StopAnswer)

Aggregations

OperationTimedoutException (com.cloud.exception.OperationTimedoutException)131 AgentUnavailableException (com.cloud.exception.AgentUnavailableException)108 CloudRuntimeException (com.cloud.utils.exception.CloudRuntimeException)73 Answer (com.cloud.agent.api.Answer)50 HostVO (com.cloud.host.HostVO)45 ResourceUnavailableException (com.cloud.exception.ResourceUnavailableException)31 ArrayList (java.util.ArrayList)27 Commands (com.cloud.agent.manager.Commands)26 ConcurrentOperationException (com.cloud.exception.ConcurrentOperationException)23 AgentControlAnswer (com.cloud.agent.api.AgentControlAnswer)22 NoTransitionException (com.cloud.utils.fsm.NoTransitionException)22 UnPlugNicAnswer (com.cloud.agent.api.UnPlugNicAnswer)21 PlugNicAnswer (com.cloud.agent.api.PlugNicAnswer)19 CheckVirtualMachineAnswer (com.cloud.agent.api.CheckVirtualMachineAnswer)18 RestoreVMSnapshotAnswer (com.cloud.agent.api.RestoreVMSnapshotAnswer)17 StartAnswer (com.cloud.agent.api.StartAnswer)17 GuestOSVO (com.cloud.storage.GuestOSVO)17 VMSnapshotVO (com.cloud.vm.snapshot.VMSnapshotVO)16 Host (com.cloud.host.Host)15 UserVmVO (com.cloud.vm.UserVmVO)15