use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.
the class HighAvailabilityManagerImpl method destroyVM.
protected Long destroyVM(final HaWorkVO work) {
final VirtualMachine vm = _itMgr.findById(work.getInstanceId());
s_logger.info("Destroying " + vm.toString());
try {
if (vm.getState() != State.Destroyed) {
s_logger.info("VM is no longer in Destroyed state " + vm.toString());
return null;
}
if (vm.getHostId() != null) {
_itMgr.destroy(vm.getUuid());
s_logger.info("Successfully destroy " + vm);
return null;
} else {
if (s_logger.isDebugEnabled()) {
s_logger.debug(vm + " has already been stopped");
}
return null;
}
} catch (final AgentUnavailableException e) {
s_logger.debug("Agnet is not available" + e.getMessage());
} catch (final OperationTimedoutException e) {
s_logger.debug("operation timed out: " + e.getMessage());
} catch (final ConcurrentOperationException e) {
s_logger.debug("concurrent operation: " + e.getMessage());
}
return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
}
use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.
the class HighAvailabilityManagerImpl method scheduleRestart.
@Override
public void scheduleRestart(VMInstanceVO vm, final boolean investigate) {
Long hostId = vm.getHostId();
if (hostId == null) {
try {
s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm);
_itMgr.advanceStop(vm.getUuid(), true);
} catch (final ResourceUnavailableException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final OperationTimedoutException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final ConcurrentOperationException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
}
}
if (!investigate) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString());
}
AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM;
if (VirtualMachineType.DomainRouter.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER;
} else if (VirtualMachineType.ConsoleProxy.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY;
} else if (VirtualMachineType.SecondaryStorageVm.equals(vm.getType())) {
alertType = AlertManager.AlertType.ALERT_TYPE_SSVM;
}
if (!(_forceHA || vm.isHaEnabled())) {
final String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodIdToDeployIn();
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly.");
if (s_logger.isDebugEnabled()) {
s_logger.debug("VM is not HA enabled so we're done.");
}
}
try {
_itMgr.advanceStop(vm.getUuid(), true);
vm = _instanceDao.findByUuid(vm.getUuid());
} catch (final ResourceUnavailableException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final OperationTimedoutException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
} catch (final ConcurrentOperationException e) {
assert false : "How do we hit this when force is true?";
throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
}
}
final List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
int timesTried = 0;
for (final HaWorkVO item : items) {
if (timesTried < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
timesTried = item.getTimesTried();
break;
}
}
if (hostId == null) {
hostId = vm.getLastHostId();
}
final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), HaWork.HaWorkType.HA, investigate ? HaWorkStep.Investigating : HaWork.HaWorkStep.Scheduled, hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated());
_haDao.persist(work);
if (s_logger.isInfoEnabled()) {
s_logger.info("Schedule vm for HA: " + vm);
}
wakeupWorkers();
}
use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.
the class AgentHookBase method startAgentHttpHandlerInVM.
@Override
public void startAgentHttpHandlerInVM(final StartupProxyCommand startupCmd) {
final StartConsoleProxyAgentHttpHandlerCommand cmd;
try {
final SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
final byte[] randomBytes = new byte[16];
random.nextBytes(randomBytes);
final String storePassword = Base64.encodeBase64String(randomBytes);
byte[] ksBits = null;
final String consoleProxyUrlDomain = _configDao.getValue(Config.ConsoleProxyUrlDomain.key());
if (consoleProxyUrlDomain == null || consoleProxyUrlDomain.isEmpty()) {
s_logger.debug("SSL is disabled for console proxy based on global config, skip loading certificates");
} else {
ksBits = _ksMgr.getKeystoreBits(ConsoleProxyManager.CERTIFICATE_NAME, ConsoleProxyManager.CERTIFICATE_NAME, storePassword);
// ks manager raises exception if ksBits are null, hence no need to explicltly handle the condition
}
cmd = new StartConsoleProxyAgentHttpHandlerCommand(ksBits, storePassword, _keysMgr.getAuthenticationKey());
cmd.setEncryptorPassword(getEncryptorPassword());
final HostVO consoleProxyHost = findConsoleProxyHost(startupCmd);
assert (consoleProxyHost != null);
if (consoleProxyHost != null) {
final Answer answer = _agentMgr.send(consoleProxyHost.getId(), cmd);
if (answer == null || !answer.getResult()) {
s_logger.error("Console proxy agent reported that it failed to execute http handling startup command");
} else {
s_logger.info("Successfully sent out command to start HTTP handling in console proxy agent");
}
}
} catch (final NoSuchAlgorithmException e) {
s_logger.error("Unexpected exception in SecureRandom Algorithm selection ", e);
} catch (final AgentUnavailableException e) {
s_logger.error("Unable to send http handling startup command to the console proxy resource for proxy:" + startupCmd.getProxyVmId(), e);
} catch (final OperationTimedoutException e) {
s_logger.error("Unable to send http handling startup command(time out) to the console proxy resource for proxy:" + startupCmd.getProxyVmId(), e);
} catch (final OutOfMemoryError e) {
s_logger.error("Unrecoverable OutOfMemory Error, exit and let it be re-launched");
System.exit(1);
} catch (final Exception e) {
s_logger.error("Unexpected exception when sending http handling startup command(time out) to the console proxy resource for proxy:" + startupCmd.getProxyVmId(), e);
}
}
use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.
the class KVMFencer method fenceOff.
@Override
public Boolean fenceOff(final VirtualMachine vm, final Host host) {
if (host.getHypervisorType() != HypervisorType.KVM) {
s_logger.warn("Don't know how to fence non kvm hosts " + host.getHypervisorType());
return null;
}
final List<HostVO> hosts = _resourceMgr.listAllHostsInCluster(host.getClusterId());
final FenceCommand fence = new FenceCommand(vm, host);
int i = 0;
for (final HostVO h : hosts) {
if (h.getHypervisorType() == HypervisorType.KVM) {
if (h.getStatus() != HostStatus.Up) {
continue;
}
i++;
if (h.getId() == host.getId()) {
continue;
}
final FenceAnswer answer;
try {
answer = (FenceAnswer) _agentMgr.send(h.getId(), fence);
} catch (final AgentUnavailableException e) {
s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
continue;
} catch (final OperationTimedoutException e) {
s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
continue;
}
if (answer != null && answer.getResult()) {
return true;
}
}
}
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Unable to fence off host: " + host.getId(), "Fencing off host " + host.getId() + " did not succeed after asking " + i + " hosts. " + "Check Agent logs for more information.");
s_logger.error("Unable to fence off " + vm.toString() + " on " + host.toString());
return false;
}
use of com.cloud.legacymodel.exceptions.OperationTimedoutException in project cosmic by MissionCriticalCloud.
the class VirtualMachineManagerImpl method orchestrateReboot.
private void orchestrateReboot(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException {
final VMInstanceVO vm = _vmDao.findByUuid(vmUuid);
// if there are active vm snapshots task, state change is not allowed
if (_vmSnapshotMgr.hasActiveVMSnapshotTasks(vm.getId())) {
s_logger.error("Unable to reboot VM " + vm + " due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
throw new CloudRuntimeException("Unable to reboot VM " + vm + " due to: " + vm.getInstanceName() + " has active VM snapshots tasks");
}
final Zone zone = _zoneRepository.findById(vm.getDataCenterId()).orElse(null);
final Host host = _hostDao.findById(vm.getHostId());
if (host == null) {
// Should findById throw an Exception is the host is not found?
throw new CloudRuntimeException("Unable to retrieve host with id " + vm.getHostId());
}
final Cluster cluster = _entityMgr.findById(Cluster.class, host.getClusterId());
final Pod pod = _entityMgr.findById(Pod.class, host.getPodId());
final DeployDestination dest = new DeployDestination(zone, pod, cluster, host);
try {
final Commands cmds = new Commands(Command.OnError.Stop);
cmds.addCommand(new RebootCommand(vm.getInstanceName(), getExecuteInSequence(vm.getHypervisorType())));
_agentMgr.send(host.getId(), cmds);
final Answer rebootAnswer = cmds.getAnswer(RebootAnswer.class);
if (rebootAnswer != null && rebootAnswer.getResult()) {
return;
}
s_logger.info("Unable to reboot VM " + vm + " on " + dest.getHost() + " due to " + (rebootAnswer == null ? " no reboot answer" : rebootAnswer.getDetails()));
} catch (final OperationTimedoutException e) {
s_logger.warn("Unable to send the reboot command to host " + dest.getHost() + " for the vm " + vm + " due to operation timeout", e);
throw new CloudRuntimeException("Failed to reboot the vm on host " + dest.getHost());
}
}
Aggregations