use of com.cloud.host.Status in project cloudstack by apache.
the class AgentManagerImpl method handleDisconnectWithoutInvestigation.
protected boolean handleDisconnectWithoutInvestigation(final AgentAttache attache, final Status.Event event, final boolean transitState, final boolean removeAgent) {
final long hostId = attache.getId();
s_logger.info("Host " + hostId + " is disconnecting with event " + event);
Status nextStatus = null;
final HostVO host = _hostDao.findById(hostId);
if (host == null) {
s_logger.warn("Can't find host with " + hostId);
nextStatus = Status.Removed;
} else {
final Status currentStatus = host.getStatus();
if (currentStatus == Status.Down || currentStatus == Status.Alert || currentStatus == Status.Removed) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Host " + hostId + " is already " + currentStatus);
}
nextStatus = currentStatus;
} else {
try {
nextStatus = currentStatus.getNextStatus(event);
} catch (final NoTransitionException e) {
final String err = "Cannot find next status for " + event + " as current status is " + currentStatus + " for agent " + hostId;
s_logger.debug(err);
throw new CloudRuntimeException(err);
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("The next status of agent " + hostId + "is " + nextStatus + ", current status is " + currentStatus);
}
}
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deregistering link for " + hostId + " with state " + nextStatus);
}
removeAgent(attache, nextStatus);
// update the DB
if (host != null && transitState) {
disconnectAgent(host, event, _nodeId);
}
return true;
}
use of com.cloud.host.Status in project cloudstack by apache.
the class AgentManagerImpl method handleDisconnectWithInvestigation.
protected boolean handleDisconnectWithInvestigation(final AgentAttache attache, Status.Event event) {
final long hostId = attache.getId();
HostVO host = _hostDao.findById(hostId);
if (host != null) {
Status nextStatus = null;
try {
nextStatus = host.getStatus().getNextStatus(event);
} catch (final NoTransitionException ne) {
/*
* Agent may be currently in status of Down, Alert, Removed, namely there is no next status for some events. Why this can happen? Ask God not me. I hate there was
* no piece of comment for code handling race condition. God knew what race condition the code dealt with!
*/
s_logger.debug("Caught exception while getting agent's next status", ne);
}
if (nextStatus == Status.Alert) {
/* OK, we are going to the bad status, let's see what happened */
s_logger.info("Investigating why host " + hostId + " has disconnected with event " + event);
Status determinedState = investigate(attache);
// if state cannot be determined do nothing and bail out
if (determinedState == null) {
if ((System.currentTimeMillis() >> 10) - host.getLastPinged() > AlertWait.value()) {
s_logger.warn("Agent " + hostId + " state cannot be determined for more than " + AlertWait + "(" + AlertWait.value() + ") seconds, will go to Alert state");
determinedState = Status.Alert;
} else {
s_logger.warn("Agent " + hostId + " state cannot be determined, do nothing");
return false;
}
}
final Status currentStatus = host.getStatus();
s_logger.info("The agent from host " + hostId + " state determined is " + determinedState);
if (determinedState == Status.Down) {
final String message = "Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs";
s_logger.error(message);
if (host.getType() != Host.Type.SecondaryStorage && host.getType() != Host.Type.ConsoleProxy) {
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host down, " + host.getId(), message);
}
event = Status.Event.HostDown;
} else if (determinedState == Status.Up) {
/* Got ping response from host, bring it back */
s_logger.info("Agent is determined to be up and running");
agentStatusTransitTo(host, Status.Event.Ping, _nodeId);
return false;
} else if (determinedState == Status.Disconnected) {
s_logger.warn("Agent is disconnected but the host is still up: " + host.getId() + "-" + host.getName());
if (currentStatus == Status.Disconnected) {
if ((System.currentTimeMillis() >> 10) - host.getLastPinged() > AlertWait.value()) {
s_logger.warn("Host " + host.getId() + " has been disconnected past the wait time it should be disconnected.");
event = Status.Event.WaitedTooLong;
} else {
s_logger.debug("Host " + host.getId() + " has been determined to be disconnected but it hasn't passed the wait time yet.");
return false;
}
} else if (currentStatus == Status.Up) {
final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId());
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
if (host.getType() != Host.Type.SecondaryStorage && host.getType() != Host.Type.ConsoleProxy) {
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host disconnected, " + hostDesc, "If the agent for host [" + hostDesc + "] is not restarted within " + AlertWait + " seconds, host will go to Alert state");
}
event = Status.Event.AgentDisconnected;
}
} else {
// if we end up here we are in alert state, send an alert
final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId());
final String podName = podVO != null ? podVO.getName() : "NO POD";
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podName;
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc, "In availability zone " + host.getDataCenterId() + ", host is in alert state: " + host.getId() + "-" + host.getName());
}
} else {
s_logger.debug("The next status of agent " + host.getId() + " is not Alert, no need to investigate what happened");
}
}
handleDisconnectWithoutInvestigation(attache, event, true, true);
// Maybe the host magically reappeared?
host = _hostDao.findById(hostId);
if (host != null && host.getStatus() == Status.Down) {
_haMgr.scheduleRestartForVmsOnHost(host, true);
}
return true;
}
use of com.cloud.host.Status in project cloudstack by apache.
the class CheckOnHostCommandTest method testGetStatus.
@Test
public void testGetStatus() {
Status s = host.getStatus();
assertTrue(s == Status.Up);
}
use of com.cloud.host.Status in project cloudstack by apache.
the class CheckOnHostCommandTest method testGetState.
@Test
public void testGetState() {
Status s = host.getState();
assertTrue(s == Status.Up);
}
use of com.cloud.host.Status in project cloudstack by apache.
the class HighAvailabilityManagerImpl method investigate.
@Override
public Status investigate(final long hostId) {
final HostVO host = _hostDao.findById(hostId);
if (host == null) {
return Status.Alert;
}
Status hostState = null;
for (Investigator investigator : investigators) {
hostState = investigator.isAgentAlive(host);
if (hostState != null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(investigator.getName() + " was able to determine host " + hostId + " is in " + hostState.toString());
}
return hostState;
}
if (s_logger.isDebugEnabled()) {
s_logger.debug(investigator.getName() + " unable to determine the state of the host. Moving on.");
}
}
return hostState;
}
Aggregations