Search in sources :

Example 26 with OperationTimedoutException

use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.

the class HighAvailabilityManagerImpl method destroyVM.

protected Long destroyVM(final HaWorkVO work) {
    final VirtualMachine vm = _itMgr.findById(work.getInstanceId());
    s_logger.info("Destroying " + vm.toString());
    try {
        if (vm.getState() != State.Destroyed) {
            s_logger.info("VM is no longer in Destroyed state " + vm.toString());
            return null;
        }
        if (vm.getHostId() != null) {
            _itMgr.destroy(vm.getUuid());
            s_logger.info("Successfully destroy " + vm);
            return null;
        } else {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug(vm + " has already been stopped");
            }
            return null;
        }
    } catch (final AgentUnavailableException e) {
        s_logger.debug("Agnet is not available" + e.getMessage());
    } catch (final OperationTimedoutException e) {
        s_logger.debug("operation timed out: " + e.getMessage());
    } catch (final ConcurrentOperationException e) {
        s_logger.debug("concurrent operation: " + e.getMessage());
    }
    return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
}
Also used : OperationTimedoutException(com.cloud.legacymodel.exceptions.OperationTimedoutException) AgentUnavailableException(com.cloud.legacymodel.exceptions.AgentUnavailableException) ConcurrentOperationException(com.cloud.legacymodel.exceptions.ConcurrentOperationException) VirtualMachine(com.cloud.legacymodel.vm.VirtualMachine)

Example 27 with OperationTimedoutException

use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.

the class HighAvailabilityManagerImpl method scheduleRestart.

@Override
public void scheduleRestart(VMInstanceVO vm, final boolean investigate) {
    Long hostId = vm.getHostId();
    if (hostId == null) {
        try {
            s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm);
            _itMgr.advanceStop(vm.getUuid(), true);
        } catch (final ResourceUnavailableException e) {
            assert false : "How do we hit this when force is true?";
            throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (final OperationTimedoutException e) {
            assert false : "How do we hit this when force is true?";
            throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (final ConcurrentOperationException e) {
            assert false : "How do we hit this when force is true?";
            throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }
    }
    if (!investigate) {
        if (s_logger.isDebugEnabled()) {
            s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString());
        }
        AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM;
        if (VirtualMachineType.DomainRouter.equals(vm.getType())) {
            alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER;
        } else if (VirtualMachineType.ConsoleProxy.equals(vm.getType())) {
            alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY;
        } else if (VirtualMachineType.SecondaryStorageVm.equals(vm.getType())) {
            alertType = AlertManager.AlertType.ALERT_TYPE_SSVM;
        }
        if (!(_forceHA || vm.isHaEnabled())) {
            final String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodIdToDeployIn();
            _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly.");
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("VM is not HA enabled so we're done.");
            }
        }
        try {
            _itMgr.advanceStop(vm.getUuid(), true);
            vm = _instanceDao.findByUuid(vm.getUuid());
        } catch (final ResourceUnavailableException e) {
            assert false : "How do we hit this when force is true?";
            throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (final OperationTimedoutException e) {
            assert false : "How do we hit this when force is true?";
            throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (final ConcurrentOperationException e) {
            assert false : "How do we hit this when force is true?";
            throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }
    }
    final List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
    int timesTried = 0;
    for (final HaWorkVO item : items) {
        if (timesTried < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
            timesTried = item.getTimesTried();
            break;
        }
    }
    if (hostId == null) {
        hostId = vm.getLastHostId();
    }
    final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), HaWork.HaWorkType.HA, investigate ? HaWorkStep.Investigating : HaWork.HaWorkStep.Scheduled, hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated());
    _haDao.persist(work);
    if (s_logger.isInfoEnabled()) {
        s_logger.info("Schedule vm for HA:  " + vm);
    }
    wakeupWorkers();
}
Also used : OperationTimedoutException(com.cloud.legacymodel.exceptions.OperationTimedoutException) AlertManager(com.cloud.alert.AlertManager) CloudRuntimeException(com.cloud.legacymodel.exceptions.CloudRuntimeException) ResourceUnavailableException(com.cloud.legacymodel.exceptions.ResourceUnavailableException) ConcurrentOperationException(com.cloud.legacymodel.exceptions.ConcurrentOperationException)

Example 28 with OperationTimedoutException

use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.

the class AgentHookBase method startAgentHttpHandlerInVM.

@Override
public void startAgentHttpHandlerInVM(final StartupProxyCommand startupCmd) {
    final StartConsoleProxyAgentHttpHandlerCommand cmd;
    try {
        final SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
        final byte[] randomBytes = new byte[16];
        random.nextBytes(randomBytes);
        final String storePassword = Base64.encodeBase64String(randomBytes);
        byte[] ksBits = null;
        final String consoleProxyUrlDomain = _configDao.getValue(Config.ConsoleProxyUrlDomain.key());
        if (consoleProxyUrlDomain == null || consoleProxyUrlDomain.isEmpty()) {
            s_logger.debug("SSL is disabled for console proxy based on global config, skip loading certificates");
        } else {
            ksBits = _ksMgr.getKeystoreBits(ConsoleProxyManager.CERTIFICATE_NAME, ConsoleProxyManager.CERTIFICATE_NAME, storePassword);
        // ks manager raises exception if ksBits are null, hence no need to explicltly handle the condition
        }
        cmd = new StartConsoleProxyAgentHttpHandlerCommand(ksBits, storePassword, _keysMgr.getAuthenticationKey());
        cmd.setEncryptorPassword(getEncryptorPassword());
        final HostVO consoleProxyHost = findConsoleProxyHost(startupCmd);
        assert (consoleProxyHost != null);
        if (consoleProxyHost != null) {
            final Answer answer = _agentMgr.send(consoleProxyHost.getId(), cmd);
            if (answer == null || !answer.getResult()) {
                s_logger.error("Console proxy agent reported that it failed to execute http handling startup command");
            } else {
                s_logger.info("Successfully sent out command to start HTTP handling in console proxy agent");
            }
        }
    } catch (final NoSuchAlgorithmException e) {
        s_logger.error("Unexpected exception in SecureRandom Algorithm selection ", e);
    } catch (final AgentUnavailableException e) {
        s_logger.error("Unable to send http handling startup command to the console proxy resource for proxy:" + startupCmd.getProxyVmId(), e);
    } catch (final OperationTimedoutException e) {
        s_logger.error("Unable to send http handling startup command(time out) to the console proxy resource for proxy:" + startupCmd.getProxyVmId(), e);
    } catch (final OutOfMemoryError e) {
        s_logger.error("Unrecoverable OutOfMemory Error, exit and let it be re-launched");
        System.exit(1);
    } catch (final Exception e) {
        s_logger.error("Unexpected exception when sending http handling startup command(time out) to the console proxy resource for proxy:" + startupCmd.getProxyVmId(), e);
    }
}
Also used : ConsoleAccessAuthenticationAnswer(com.cloud.legacymodel.communication.answer.ConsoleAccessAuthenticationAnswer) AgentControlAnswer(com.cloud.legacymodel.communication.answer.AgentControlAnswer) GetVncPortAnswer(com.cloud.legacymodel.communication.answer.GetVncPortAnswer) Answer(com.cloud.legacymodel.communication.answer.Answer) OperationTimedoutException(com.cloud.legacymodel.exceptions.OperationTimedoutException) AgentUnavailableException(com.cloud.legacymodel.exceptions.AgentUnavailableException) StartConsoleProxyAgentHttpHandlerCommand(com.cloud.legacymodel.communication.command.StartConsoleProxyAgentHttpHandlerCommand) SecureRandom(java.security.SecureRandom) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) HostVO(com.cloud.host.HostVO) OperationTimedoutException(com.cloud.legacymodel.exceptions.OperationTimedoutException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) AgentUnavailableException(com.cloud.legacymodel.exceptions.AgentUnavailableException)

Example 29 with OperationTimedoutException

use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.

the class KVMFencer method fenceOff.

@Override
public Boolean fenceOff(final VirtualMachine vm, final Host host) {
    if (host.getHypervisorType() != HypervisorType.KVM) {
        s_logger.warn("Don't know how to fence non kvm hosts " + host.getHypervisorType());
        return null;
    }
    final List<HostVO> hosts = _resourceMgr.listAllHostsInCluster(host.getClusterId());
    final FenceCommand fence = new FenceCommand(vm, host);
    int i = 0;
    for (final HostVO h : hosts) {
        if (h.getHypervisorType() == HypervisorType.KVM) {
            if (h.getStatus() != HostStatus.Up) {
                continue;
            }
            i++;
            if (h.getId() == host.getId()) {
                continue;
            }
            final FenceAnswer answer;
            try {
                answer = (FenceAnswer) _agentMgr.send(h.getId(), fence);
            } catch (final AgentUnavailableException e) {
                s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
                continue;
            } catch (final OperationTimedoutException e) {
                s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
                continue;
            }
            if (answer != null && answer.getResult()) {
                return true;
            }
        }
    }
    _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Unable to fence off host: " + host.getId(), "Fencing off host " + host.getId() + " did not succeed after asking " + i + " hosts. " + "Check Agent logs for more information.");
    s_logger.error("Unable to fence off " + vm.toString() + " on " + host.toString());
    return false;
}
Also used : FenceCommand(com.cloud.legacymodel.communication.command.FenceCommand) OperationTimedoutException(com.cloud.legacymodel.exceptions.OperationTimedoutException) AgentUnavailableException(com.cloud.legacymodel.exceptions.AgentUnavailableException) FenceAnswer(com.cloud.legacymodel.communication.answer.FenceAnswer) HostVO(com.cloud.host.HostVO)

Example 30 with OperationTimedoutException

use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.

the class VirtualMachineManagerImpl method orchestrateReboot.

private void orchestrateReboot(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
    final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
    // if there are active vm snapshots task, state change is not allowed
    if (_vmSnapshotMgr.hasActiveVMSnapshotTasks(vm.getId())) {
        s_logger.error("Unable to reboot VM " + vm + " due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
        throw new CloudRuntimeException("Unable to reboot VM " + vm + " due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
    }
    final Zone zone = _zoneRepository.findById(vm.getDataCenterId()).orElse(null);
    final Host host = _hostDao.findById(vm.getHostId());
    if (host == null) {
        // Should findById throw an Exception is the host is not found?
        throw new CloudRuntimeException("Unable to retrieve host with id " + vm.getHostId());
    }
    final Cluster cluster = _entityMgr.findById(Cluster.class, host.getClusterId());
    final Pod pod = _entityMgr.findById(Pod.class, host.getPodId());
    final DeployDestination dest = new DeployDestination(zone, pod, cluster, host);
    try {
        final Commands cmds = new Commands(Command.OnError.Stop);
        cmds.addCommand(new RebootCommand(vm.getInstanceName(), getExecuteInSequence(vm.getHypervisorType())));
        _agentMgr.send(host.getId(), cmds);
        final Answer rebootAnswer = cmds.getAnswer(RebootAnswer.class);
        if (rebootAnswer != null && rebootAnswer.getResult()) {
            return;
        }
        s_logger.info("Unable to reboot VM " + vm + " on " + dest.getHost() + " due to " + (rebootAnswer == null ? " no reboot answer" : rebootAnswer.getDetails()));
    } catch (final OperationTimedoutException e) {
        s_logger.warn("Unable to send the reboot command to host " + dest.getHost() + " for the vm " + vm + " due to operation timeout", e);
        throw new CloudRuntimeException("Failed to reboot the vm on host " + dest.getHost());
    }
}
Also used : UnPlugNicAnswer(com.cloud.legacymodel.communication.answer.UnPlugNicAnswer) AgentControlAnswer(com.cloud.legacymodel.communication.answer.AgentControlAnswer) ClusterVMMetaDataSyncAnswer(com.cloud.legacymodel.communication.answer.ClusterVMMetaDataSyncAnswer) RestoreVMSnapshotAnswer(com.cloud.legacymodel.communication.answer.RestoreVMSnapshotAnswer) RebootAnswer(com.cloud.legacymodel.communication.answer.RebootAnswer) StartAnswer(com.cloud.legacymodel.communication.answer.StartAnswer) PlugNicAnswer(com.cloud.legacymodel.communication.answer.PlugNicAnswer) CheckVirtualMachineAnswer(com.cloud.legacymodel.communication.answer.CheckVirtualMachineAnswer) StopAnswer(com.cloud.legacymodel.communication.answer.StopAnswer) Answer(com.cloud.legacymodel.communication.answer.Answer) OperationTimedoutException(com.cloud.legacymodel.exceptions.OperationTimedoutException) RebootCommand(com.cloud.legacymodel.communication.command.RebootCommand) Pod(com.cloud.legacymodel.dc.Pod) CloudRuntimeException(com.cloud.legacymodel.exceptions.CloudRuntimeException) TimeZone(java.util.TimeZone) Zone(com.cloud.db.model.Zone) DeployDestination(com.cloud.deploy.DeployDestination) Commands(com.cloud.agent.manager.Commands) Cluster(com.cloud.legacymodel.dc.Cluster) Host(com.cloud.legacymodel.dc.Host)

Aggregations

OperationTimedoutException (com.cloud.legacymodel.exceptions.OperationTimedoutException)42 AgentUnavailableException (com.cloud.legacymodel.exceptions.AgentUnavailableException)32 CloudRuntimeException (com.cloud.legacymodel.exceptions.CloudRuntimeException)24 Answer (com.cloud.legacymodel.communication.answer.Answer)16 HostVO (com.cloud.host.HostVO)14 ResourceUnavailableException (com.cloud.legacymodel.exceptions.ResourceUnavailableException)12 AgentControlAnswer (com.cloud.legacymodel.communication.answer.AgentControlAnswer)10 UnPlugNicAnswer (com.cloud.legacymodel.communication.answer.UnPlugNicAnswer)9 ConcurrentOperationException (com.cloud.legacymodel.exceptions.ConcurrentOperationException)9 NoTransitionException (com.cloud.legacymodel.exceptions.NoTransitionException)9 CheckVirtualMachineAnswer (com.cloud.legacymodel.communication.answer.CheckVirtualMachineAnswer)8 PlugNicAnswer (com.cloud.legacymodel.communication.answer.PlugNicAnswer)8 RestoreVMSnapshotAnswer (com.cloud.legacymodel.communication.answer.RestoreVMSnapshotAnswer)8 StartAnswer (com.cloud.legacymodel.communication.answer.StartAnswer)8 Commands (com.cloud.agent.manager.Commands)7 ClusterVMMetaDataSyncAnswer (com.cloud.legacymodel.communication.answer.ClusterVMMetaDataSyncAnswer)7 RebootAnswer (com.cloud.legacymodel.communication.answer.RebootAnswer)7 StopAnswer (com.cloud.legacymodel.communication.answer.StopAnswer)7 Host (com.cloud.legacymodel.dc.Host)6 VolumeObjectTO (com.cloud.legacymodel.to.VolumeObjectTO)6