use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class MeshArbiter method discoverGlobalFaultData_rcv.
/**
* Collect the failure site update messages from all sites This site sent
* its own mailbox the above broadcast the maximum is local to this site.
* This also ensures at least one response.
*
* Concurrent failures can be detected by additional reports from the FaultDistributor
* or a mismatch in the set of failed hosts reported in a message from another site
*/
private boolean discoverGlobalFaultData_rcv(Set<Long> hsIds) {
long blockedOnReceiveStart = System.currentTimeMillis();
long lastReportTime = 0;
boolean haveEnough = false;
int[] forwardStallCount = new int[] { FORWARD_STALL_COUNT };
do {
VoltMessage m = m_mailbox.recvBlocking(receiveSubjects, 5);
/*
* If fault resolution takes longer then 10 seconds start logging
*/
final long now = System.currentTimeMillis();
if (now - blockedOnReceiveStart > 10000) {
if (now - lastReportTime > 60000) {
lastReportTime = System.currentTimeMillis();
haveNecessaryFaultInfo(m_seeker.getSurvivors(), true);
}
}
if (m == null) {
// Send a heartbeat to keep the dead host timeout active. Needed because IV2 doesn't
// generate its own heartbeats to keep this running.
m_meshAide.sendHeartbeats(m_seeker.getSurvivors());
} else if (m.getSubject() == Subject.SITE_FAILURE_UPDATE.getId()) {
SiteFailureMessage sfm = (SiteFailureMessage) m;
if (!m_seeker.getSurvivors().contains(m.m_sourceHSId) || m_failedSites.contains(m.m_sourceHSId) || m_failedSites.containsAll(sfm.getFailedSites()))
continue;
if (!sfm.m_decision.isEmpty()) {
m_decidedSurvivors.put(sfm.m_sourceHSId, sfm);
}
updateFailedSitesLedger(hsIds, sfm);
m_seeker.add(sfm);
addForwardCandidate(new SiteFailureForwardMessage(sfm));
m_recoveryLog.info("Agreement, Received " + sfm);
} else if (m.getSubject() == Subject.SITE_FAILURE_FORWARD.getId()) {
SiteFailureForwardMessage fsfm = (SiteFailureForwardMessage) m;
addForwardCandidate(fsfm);
if (!hsIds.contains(fsfm.m_sourceHSId) || m_seeker.getSurvivors().contains(fsfm.m_reportingHSId) || m_failedSites.contains(fsfm.m_reportingHSId) || m_failedSites.containsAll(fsfm.getFailedSites()))
continue;
m_seeker.add(fsfm);
m_recoveryLog.info("Agreement, Received forward " + fsfm);
forwardStallCount[0] = FORWARD_STALL_COUNT;
} else if (m.getSubject() == Subject.FAILURE.getId()) {
/*
* If the fault distributor reports a new fault, ignore it if it is known , otherwise
* re-deliver the message to ourself and then abort so that the process can restart.
*/
FaultMessage fm = (FaultMessage) m;
Discard ignoreIt = mayIgnore(hsIds, fm);
if (Discard.DoNot == ignoreIt) {
m_mailbox.deliverFront(m);
m_recoveryLog.info("Agreement, Detected a concurrent failure from FaultDistributor, new failed site " + CoreUtils.hsIdToString(fm.failedSite));
return false;
} else {
if (m_recoveryLog.isDebugEnabled()) {
ignoreIt.log(fm);
}
}
}
haveEnough = haveEnough || haveNecessaryFaultInfo(m_seeker.getSurvivors(), false);
if (haveEnough) {
Iterator<Map.Entry<Long, SiteFailureForwardMessage>> itr = m_forwardCandidates.entrySet().iterator();
while (itr.hasNext()) {
Map.Entry<Long, SiteFailureForwardMessage> e = itr.next();
Set<Long> unseenBy = m_seeker.forWhomSiteIsDead(e.getKey());
if (unseenBy.size() > 0) {
m_mailbox.send(Longs.toArray(unseenBy), e.getValue());
m_recoveryLog.info("Agreement, fowarding to " + CoreUtils.hsIdCollectionToString(unseenBy) + " " + e.getValue());
}
itr.remove();
}
}
} while (!haveEnough || m_seeker.needForward(forwardStallCount));
return true;
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class MiniSite method run.
@Override
public void run() {
long lastHeartbeatTime = System.currentTimeMillis();
while (m_shouldContinue.get()) {
VoltMessage msg = m_mailbox.recvBlocking(5);
if (msg != null) {
if (msg instanceof LocalObjectMessage) {
LocalObjectMessage lomsg = (LocalObjectMessage) msg;
((Runnable) lomsg.payload).run();
} else {
processMessage(msg);
}
}
long now = System.currentTimeMillis();
if (now - lastHeartbeatTime > 5) {
sendHeartbeats(m_currentHSIds);
lastHeartbeatTime = now;
}
}
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class MiniMailbox method recvBlocking.
@Override
public synchronized VoltMessage recvBlocking(Subject[] subjects) {
VoltMessage message = null;
while (message == null) {
for (Subject s : subjects) {
final Deque<VoltMessage> dq = m_messages.get(s.getId());
message = dq.poll();
if (message != null) {
return message;
}
}
try {
this.wait();
} catch (InterruptedException e) {
return null;
}
}
return null;
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class MiniMailbox method recv.
@Override
public synchronized VoltMessage recv(Subject[] subjects) {
for (Subject s : subjects) {
final Deque<VoltMessage> dq = m_messages.get(s.getId());
assert (dq != null);
VoltMessage m = dq.poll();
if (m != null) {
return m;
}
}
return null;
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class AgreementSite method recoveryRunLoop.
public void recoveryRunLoop() throws Exception {
long lastHeartbeatTime = System.currentTimeMillis();
while (m_recovering && m_shouldContinue) {
if (m_recoveryStage == RecoveryStage.WAITING_FOR_SAFETY) {
Long safeTxnId = m_txnQueue.safeToRecover();
if (safeTxnId != null) {
m_recoveryStage = RecoveryStage.SENT_PROPOSAL;
m_recoverBeforeTxn = safeTxnId;
long sourceHSId = 0;
for (Long hsId : m_hsIds) {
if (hsId != m_hsId) {
sourceHSId = hsId;
break;
}
}
RecoveryMessage recoveryMessage = new RecoveryMessage(m_hsId, safeTxnId, -1);
m_mailbox.send(sourceHSId, recoveryMessage);
}
}
VoltMessage message = m_mailbox.recvBlocking(5);
if (message != null) {
processMessage(message);
}
final long now = System.currentTimeMillis();
if (now - lastHeartbeatTime > 5) {
lastHeartbeatTime = now;
sendHeartbeats();
}
if (m_recoverBeforeTxn == null) {
continue;
}
if (m_txnQueue.peek() != null && m_txnQueue.peek().txnId < m_recoverBeforeTxn.longValue()) {
m_transactionsById.remove(m_txnQueue.poll().txnId);
} else if (m_recoveryStage == RecoveryStage.RECEIVED_SNAPSHOT) {
processZKSnapshot();
return;
}
}
}
Aggregations