Search in sources :

Example 11 with VoltMessage

use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.

the class MeshArbiter method discoverGlobalFaultData_rcv.

/**
     * Collect the failure site update messages from all sites This site sent
     * its own mailbox the above broadcast the maximum is local to this site.
     * This also ensures at least one response.
     *
     * Concurrent failures can be detected by additional reports from the FaultDistributor
     * or a mismatch in the set of failed hosts reported in a message from another site
     */
private boolean discoverGlobalFaultData_rcv(Set<Long> hsIds) {
    long blockedOnReceiveStart = System.currentTimeMillis();
    long lastReportTime = 0;
    boolean haveEnough = false;
    int[] forwardStallCount = new int[] { FORWARD_STALL_COUNT };
    do {
        VoltMessage m = m_mailbox.recvBlocking(receiveSubjects, 5);
        /*
             * If fault resolution takes longer then 10 seconds start logging
             */
        final long now = System.currentTimeMillis();
        if (now - blockedOnReceiveStart > 10000) {
            if (now - lastReportTime > 60000) {
                lastReportTime = System.currentTimeMillis();
                haveNecessaryFaultInfo(m_seeker.getSurvivors(), true);
            }
        }
        if (m == null) {
            // Send a heartbeat to keep the dead host timeout active.  Needed because IV2 doesn't
            // generate its own heartbeats to keep this running.
            m_meshAide.sendHeartbeats(m_seeker.getSurvivors());
        } else if (m.getSubject() == Subject.SITE_FAILURE_UPDATE.getId()) {
            SiteFailureMessage sfm = (SiteFailureMessage) m;
            if (!m_seeker.getSurvivors().contains(m.m_sourceHSId) || m_failedSites.contains(m.m_sourceHSId) || m_failedSites.containsAll(sfm.getFailedSites()))
                continue;
            if (!sfm.m_decision.isEmpty()) {
                m_decidedSurvivors.put(sfm.m_sourceHSId, sfm);
            }
            updateFailedSitesLedger(hsIds, sfm);
            m_seeker.add(sfm);
            addForwardCandidate(new SiteFailureForwardMessage(sfm));
            m_recoveryLog.info("Agreement, Received " + sfm);
        } else if (m.getSubject() == Subject.SITE_FAILURE_FORWARD.getId()) {
            SiteFailureForwardMessage fsfm = (SiteFailureForwardMessage) m;
            addForwardCandidate(fsfm);
            if (!hsIds.contains(fsfm.m_sourceHSId) || m_seeker.getSurvivors().contains(fsfm.m_reportingHSId) || m_failedSites.contains(fsfm.m_reportingHSId) || m_failedSites.containsAll(fsfm.getFailedSites()))
                continue;
            m_seeker.add(fsfm);
            m_recoveryLog.info("Agreement, Received forward " + fsfm);
            forwardStallCount[0] = FORWARD_STALL_COUNT;
        } else if (m.getSubject() == Subject.FAILURE.getId()) {
            /*
                 * If the fault distributor reports a new fault, ignore it if it is known , otherwise
                 * re-deliver the message to ourself and then abort so that the process can restart.
                 */
            FaultMessage fm = (FaultMessage) m;
            Discard ignoreIt = mayIgnore(hsIds, fm);
            if (Discard.DoNot == ignoreIt) {
                m_mailbox.deliverFront(m);
                m_recoveryLog.info("Agreement, Detected a concurrent failure from FaultDistributor, new failed site " + CoreUtils.hsIdToString(fm.failedSite));
                return false;
            } else {
                if (m_recoveryLog.isDebugEnabled()) {
                    ignoreIt.log(fm);
                }
            }
        }
        haveEnough = haveEnough || haveNecessaryFaultInfo(m_seeker.getSurvivors(), false);
        if (haveEnough) {
            Iterator<Map.Entry<Long, SiteFailureForwardMessage>> itr = m_forwardCandidates.entrySet().iterator();
            while (itr.hasNext()) {
                Map.Entry<Long, SiteFailureForwardMessage> e = itr.next();
                Set<Long> unseenBy = m_seeker.forWhomSiteIsDead(e.getKey());
                if (unseenBy.size() > 0) {
                    m_mailbox.send(Longs.toArray(unseenBy), e.getValue());
                    m_recoveryLog.info("Agreement, fowarding to " + CoreUtils.hsIdCollectionToString(unseenBy) + " " + e.getValue());
                }
                itr.remove();
            }
        }
    } while (!haveEnough || m_seeker.needForward(forwardStallCount));
    return true;
}
Also used : SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google_voltpatches.common.collect.ImmutableMap)

Example 12 with VoltMessage

use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.

the class MiniSite method run.

@Override
public void run() {
    long lastHeartbeatTime = System.currentTimeMillis();
    while (m_shouldContinue.get()) {
        VoltMessage msg = m_mailbox.recvBlocking(5);
        if (msg != null) {
            if (msg instanceof LocalObjectMessage) {
                LocalObjectMessage lomsg = (LocalObjectMessage) msg;
                ((Runnable) lomsg.payload).run();
            } else {
                processMessage(msg);
            }
        }
        long now = System.currentTimeMillis();
        if (now - lastHeartbeatTime > 5) {
            sendHeartbeats(m_currentHSIds);
            lastHeartbeatTime = now;
        }
    }
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) LocalObjectMessage(org.voltcore.messaging.LocalObjectMessage)

Example 13 with VoltMessage

use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.

the class MiniMailbox method recvBlocking.

@Override
public synchronized VoltMessage recvBlocking(Subject[] subjects) {
    VoltMessage message = null;
    while (message == null) {
        for (Subject s : subjects) {
            final Deque<VoltMessage> dq = m_messages.get(s.getId());
            message = dq.poll();
            if (message != null) {
                return message;
            }
        }
        try {
            this.wait();
        } catch (InterruptedException e) {
            return null;
        }
    }
    return null;
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) Subject(org.voltcore.messaging.Subject)

Example 14 with VoltMessage

use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.

the class MiniMailbox method recv.

@Override
public synchronized VoltMessage recv(Subject[] subjects) {
    for (Subject s : subjects) {
        final Deque<VoltMessage> dq = m_messages.get(s.getId());
        assert (dq != null);
        VoltMessage m = dq.poll();
        if (m != null) {
            return m;
        }
    }
    return null;
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) Subject(org.voltcore.messaging.Subject)

Example 15 with VoltMessage

use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.

the class AgreementSite method recoveryRunLoop.

public void recoveryRunLoop() throws Exception {
    long lastHeartbeatTime = System.currentTimeMillis();
    while (m_recovering && m_shouldContinue) {
        if (m_recoveryStage == RecoveryStage.WAITING_FOR_SAFETY) {
            Long safeTxnId = m_txnQueue.safeToRecover();
            if (safeTxnId != null) {
                m_recoveryStage = RecoveryStage.SENT_PROPOSAL;
                m_recoverBeforeTxn = safeTxnId;
                long sourceHSId = 0;
                for (Long hsId : m_hsIds) {
                    if (hsId != m_hsId) {
                        sourceHSId = hsId;
                        break;
                    }
                }
                RecoveryMessage recoveryMessage = new RecoveryMessage(m_hsId, safeTxnId, -1);
                m_mailbox.send(sourceHSId, recoveryMessage);
            }
        }
        VoltMessage message = m_mailbox.recvBlocking(5);
        if (message != null) {
            processMessage(message);
        }
        final long now = System.currentTimeMillis();
        if (now - lastHeartbeatTime > 5) {
            lastHeartbeatTime = now;
            sendHeartbeats();
        }
        if (m_recoverBeforeTxn == null) {
            continue;
        }
        if (m_txnQueue.peek() != null && m_txnQueue.peek().txnId < m_recoverBeforeTxn.longValue()) {
            m_transactionsById.remove(m_txnQueue.poll().txnId);
        } else if (m_recoveryStage == RecoveryStage.RECEIVED_SNAPSHOT) {
            processZKSnapshot();
            return;
        }
    }
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) RecoveryMessage(org.voltcore.messaging.RecoveryMessage)

Aggregations

VoltMessage (org.voltcore.messaging.VoltMessage)31 Test (org.junit.Test)8 AtomicLong (java.util.concurrent.atomic.AtomicLong)5 FaultMessage (org.voltcore.messaging.FaultMessage)4 SiteFailureMessage (org.voltcore.messaging.SiteFailureMessage)4 Subject (org.voltcore.messaging.Subject)4 ByteBuffer (java.nio.ByteBuffer)3 ArrayList (java.util.ArrayList)3 Iv2RepairLogResponseMessage (org.voltdb.messaging.Iv2RepairLogResponseMessage)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Message (org.voltcore.agreement.FakeMesh.Message)2 SiteFailureForwardMessage (org.voltcore.messaging.SiteFailureForwardMessage)2 InitiateResponseMessage (org.voltdb.messaging.InitiateResponseMessage)2 LocalMailbox (org.voltdb.messaging.LocalMailbox)2 ImmutableList (com.google_voltpatches.common.collect.ImmutableList)1 ImmutableMap (com.google_voltpatches.common.collect.ImmutableMap)1 List (java.util.List)1