use of com.cloud.host.Status in project cosmic by MissionCriticalCloud.
the class NetworkOrchestrator method destroyNetwork.
@Override
@DB
public boolean destroyNetwork(final long networkId, final ReservationContext context, final boolean forced) {
final Account callerAccount = context.getAccount();
NetworkVO network = _networksDao.findById(networkId);
if (network == null) {
s_logger.debug("Unable to find network with id: " + networkId);
return false;
}
// Make sure that there are no user vms in the network that are not Expunged/Error
final List<UserVmVO> userVms = _userVmDao.listByNetworkIdAndStates(networkId);
for (final UserVmVO vm : userVms) {
if (!(vm.getState() == VirtualMachine.State.Expunging && vm.getRemoved() != null)) {
s_logger.warn("Can't delete the network, not all user vms are expunged. Vm " + vm + " is in " + vm.getState() + " state");
return false;
}
}
// Don't allow to delete network via api call when it has vms assigned to it
final int nicCount = getActiveNicsInNetwork(networkId);
if (nicCount > 0) {
s_logger.debug("The network id=" + networkId + " has active Nics, but shouldn't.");
// at this point we have already determined that there are no active user vms in network
// if the op_networks table shows active nics, it's a bug in releasing nics updating op_networks
_networksDao.changeActiveNicsBy(networkId, -1 * nicCount);
}
// In Basic zone, make sure that there are no non-removed console proxies and SSVMs using the network
final Zone zone = _zoneRepository.findOne(network.getDataCenterId());
if (zone.getNetworkType() == com.cloud.model.enumeration.NetworkType.Basic) {
final List<VMInstanceVO> systemVms = _vmDao.listNonRemovedVmsByTypeAndNetwork(network.getId(), Type.ConsoleProxy, Type.SecondaryStorageVm);
if (systemVms != null && !systemVms.isEmpty()) {
s_logger.warn("Can't delete the network, not all consoleProxy/secondaryStorage vms are expunged");
return false;
}
}
// Shutdown network first
shutdownNetwork(networkId, context, false);
// get updated state for the network
network = _networksDao.findById(networkId);
if (network.getState() != Network.State.Allocated && network.getState() != Network.State.Setup && !forced) {
s_logger.debug("Network is not not in the correct state to be destroyed: " + network.getState());
return false;
}
boolean success = true;
if (!cleanupNetworkResources(networkId, callerAccount, context.getCaller().getId())) {
s_logger.warn("Unable to delete network id=" + networkId + ": failed to cleanup network resources");
return false;
}
// get providers to destroy
final List<Provider> providersToDestroy = getNetworkProviders(network.getId());
for (final NetworkElement element : networkElements) {
if (providersToDestroy.contains(element.getProvider())) {
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Sending destroy to " + element);
}
if (!element.destroy(network, context)) {
success = false;
s_logger.warn("Unable to complete destroy of the network: failed to destroy network element " + element.getName());
}
} catch (final ResourceUnavailableException e) {
s_logger.warn("Unable to complete destroy of the network due to element: " + element.getName(), e);
success = false;
} catch (final ConcurrentOperationException e) {
s_logger.warn("Unable to complete destroy of the network due to element: " + element.getName(), e);
success = false;
} catch (final Exception e) {
s_logger.warn("Unable to complete destroy of the network due to element: " + element.getName(), e);
success = false;
}
}
}
if (success) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Network id=" + networkId + " is destroyed successfully, cleaning up corresponding resources now.");
}
final NetworkVO networkFinal = network;
try {
Transaction.execute(new TransactionCallbackNoReturn() {
@Override
public void doInTransactionWithoutResult(final TransactionStatus status) {
final NetworkGuru guru = AdapterBase.getAdapterByName(networkGurus, networkFinal.getGuruName());
// Deleting sync networks
final List<NetworkVO> syncNetworks = _networksDao.listSyncNetworksByRelatedNetwork(networkId);
syncNetworks.forEach(syncNetwork -> removeAndShutdownSyncNetwork(syncNetwork.getId()));
guru.trash(networkFinal, _networkOfferingDao.findById(networkFinal.getNetworkOfferingId()));
if (!deleteVlansInNetwork(networkFinal.getId(), context.getCaller().getId(), callerAccount)) {
s_logger.warn("Failed to delete network " + networkFinal + "; was unable to cleanup corresponding ip ranges");
throw new CloudRuntimeException("Failed to delete network " + networkFinal + "; was unable to cleanup corresponding ip ranges");
} else {
// commit transaction only when ips and vlans for the network are released successfully
try {
stateTransitTo(networkFinal, Event.DestroyNetwork);
} catch (final NoTransitionException e) {
s_logger.debug(e.getMessage());
}
if (_networksDao.remove(networkFinal.getId())) {
final NetworkDomainVO networkDomain = _networkDomainDao.getDomainNetworkMapByNetworkId(networkFinal.getId());
if (networkDomain != null) {
_networkDomainDao.remove(networkDomain.getId());
}
final NetworkAccountVO networkAccount = _networkAccountDao.getAccountNetworkMapByNetworkId(networkFinal.getId());
if (networkAccount != null) {
_networkAccountDao.remove(networkAccount.getId());
}
}
final NetworkOffering ntwkOff = _entityMgr.findById(NetworkOffering.class, networkFinal.getNetworkOfferingId());
final boolean updateResourceCount = resourceCountNeedsUpdate(ntwkOff, networkFinal.getAclType());
if (updateResourceCount) {
_resourceLimitMgr.decrementResourceCount(networkFinal.getAccountId(), ResourceType.network, networkFinal.getDisplayNetwork());
}
}
}
});
if (_networksDao.findById(network.getId()) == null) {
// remove its related ACL permission
final Pair<Class<?>, Long> networkMsg = new Pair<>(Network.class, networkFinal.getId());
_messageBus.publish(_name, EntityManager.MESSAGE_REMOVE_ENTITY_EVENT, PublishScope.LOCAL, networkMsg);
}
return true;
} catch (final CloudRuntimeException e) {
s_logger.error("Failed to delete network", e);
return false;
}
}
return success;
}
use of com.cloud.host.Status in project cosmic by MissionCriticalCloud.
the class CheckOnHostCommandTest method testGetState.
@Test
public void testGetState() {
final Status s = host.getState();
assertTrue(s == Status.Up);
}
use of com.cloud.host.Status in project cosmic by MissionCriticalCloud.
the class HighAvailabilityManagerImpl method investigate.
@Override
public Status investigate(final long hostId) {
final HostVO host = _hostDao.findById(hostId);
if (host == null) {
return Status.Alert;
}
Status hostState = null;
for (final Investigator investigator : investigators) {
hostState = investigator.isAgentAlive(host);
if (hostState != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(investigator.getName() + " was able to determine host " + hostId + " is in " + hostState.toString());
}
return hostState;
}
if (s_logger.isDebugEnabled()) {
s_logger.debug(investigator.getName() + " unable to determine the state of the host. Moving on.");
}
}
return hostState;
}
use of com.cloud.host.Status in project cosmic by MissionCriticalCloud.
the class ManagementIPSystemVMInvestigator method isVmAlive.
@Override
public boolean isVmAlive(final VirtualMachine vm, final Host host) throws UnknownVM {
if (!vm.getType().isUsedBySystem()) {
s_logger.debug("Not a System Vm, unable to determine state of " + vm + " returning null");
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Testing if " + vm + " is alive");
}
if (vm.getHostId() == null) {
s_logger.debug("There's no host id for " + vm);
throw new UnknownVM();
}
final HostVO vmHost = _hostDao.findById(vm.getHostId());
if (vmHost == null) {
s_logger.debug("Unable to retrieve the host by using id " + vm.getHostId());
throw new UnknownVM();
}
final List<? extends Nic> nics = _networkMgr.getNicsForTraffic(vm.getId(), TrafficType.Management);
if (nics.size() == 0) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to find a management nic, cannot ping this system VM, unable to determine state of " + vm + " returning null");
}
throw new UnknownVM();
}
for (final Nic nic : nics) {
if (nic.getIPv4Address() == null) {
continue;
}
// get the data center IP address, find a host on the pod, use that host to ping the data center IP address
final List<Long> otherHosts = findHostByPod(vmHost.getPodId(), vm.getHostId());
for (final Long otherHost : otherHosts) {
final Status vmState = testIpAddress(otherHost, nic.getIPv4Address());
assert vmState != null;
// In case of Status.Unknown, next host will be tried
if (vmState == Status.Up) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("successfully pinged vm's private IP (" + vm.getPrivateIpAddress() + "), returning that the VM is up");
}
return Boolean.TRUE;
} else if (vmState == Status.Down) {
// We can't ping the VM directly...if we can ping the host, then report the VM down.
// If we can't ping the host, then we don't have enough information.
final Status vmHostState = testIpAddress(otherHost, vmHost.getPrivateIpAddress());
assert vmHostState != null;
if (vmHostState == Status.Up) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("successfully pinged vm's host IP (" + vmHost.getPrivateIpAddress() + "), but could not ping VM, returning that the VM is down");
}
return Boolean.FALSE;
}
}
}
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("unable to determine state of " + vm + " returning null");
}
throw new UnknownVM();
}
use of com.cloud.host.Status in project cosmic by MissionCriticalCloud.
the class AgentManagerImpl method handleDisconnectWithInvestigation.
protected boolean handleDisconnectWithInvestigation(final AgentAttache attache, Status.Event event) {
final long hostId = attache.getId();
HostVO host = _hostDao.findById(hostId);
if (host != null) {
Status nextStatus = null;
try {
nextStatus = host.getStatus().getNextStatus(event);
} catch (final NoTransitionException ne) {
/*
* Agent may be currently in status of Down, Alert, Removed, namely there is no next status for some events. Why this can happen? Ask God not me. I hate there was
* no piece of comment for code handling race condition. God knew what race condition the code dealt with!
*/
s_logger.debug("Caught exception while getting agent's next status", ne);
}
if (nextStatus == Status.Alert) {
/* OK, we are going to the bad status, let's see what happened */
s_logger.info("Investigating why host " + hostId + " has disconnected with event " + event);
Status determinedState = investigate(attache);
// if state cannot be determined do nothing and bail out
if (determinedState == null) {
if ((System.currentTimeMillis() >> 10) - host.getLastPinged() > AlertWait.value()) {
s_logger.warn("Agent " + hostId + " state cannot be determined for more than " + AlertWait + "(" + AlertWait.value() + ") seconds, will go to Alert state");
determinedState = Status.Alert;
} else {
s_logger.warn("Agent " + hostId + " state cannot be determined, do nothing");
return false;
}
}
final Status currentStatus = host.getStatus();
s_logger.info("The agent from host " + hostId + " state determined is " + determinedState);
if (determinedState == Status.Down) {
final String message = "Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs";
s_logger.error(message);
if (host.getType() != Host.Type.SecondaryStorage && host.getType() != Host.Type.ConsoleProxy) {
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host down, " + host.getId(), message);
}
event = Status.Event.HostDown;
} else if (determinedState == Status.Up) {
/* Got ping response from host, bring it back */
s_logger.info("Agent is determined to be up and running");
agentStatusTransitTo(host, Status.Event.Ping, _nodeId);
return false;
} else if (determinedState == Status.Disconnected) {
s_logger.warn("Agent is disconnected but the host is still up: " + host.getId() + "-" + host.getName());
if (currentStatus == Status.Disconnected) {
if ((System.currentTimeMillis() >> 10) - host.getLastPinged() > AlertWait.value()) {
s_logger.warn("Host " + host.getId() + " has been disconnected past the wait time it should be disconnected.");
event = Status.Event.WaitedTooLong;
} else {
s_logger.debug("Host " + host.getId() + " has been determined to be disconnected but it hasn't passed the wait time yet.");
return false;
}
} else if (currentStatus == Status.Up) {
final Zone zone = _zoneRepository.findOne(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId());
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + zone.getName() + ", pod: " + podVO.getName();
if (host.getType() != Host.Type.SecondaryStorage && host.getType() != Host.Type.ConsoleProxy) {
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host disconnected, " + hostDesc, "If the agent for host [" + hostDesc + "] is not restarted within " + AlertWait + " seconds, host will go to Alert state");
}
event = Status.Event.AgentDisconnected;
}
} else {
// if we end up here we are in alert state, send an alert
final Zone zone = _zoneRepository.findOne(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId());
final String podName = podVO != null ? podVO.getName() : "NO POD";
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + zone.getName() + ", pod: " + podName;
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc, "In availability zone " + host.getDataCenterId() + ", host is in alert state: " + host.getId() + "-" + host.getName());
}
} else {
s_logger.debug("The next status of agent " + host.getId() + " is not Alert, no need to investigate what happened");
}
}
handleDisconnectWithoutInvestigation(attache, event, true, true);
// Maybe the host magically reappeared?
host = _hostDao.findById(hostId);
if (host != null && host.getStatus() == Status.Down) {
_haMgr.scheduleRestartForVmsOnHost(host, true);
}
return true;
}
Aggregations