Search in sources :

Example 1 with SiteFailureForwardMessage

use of org.voltcore.messaging.SiteFailureForwardMessage in project voltdb by VoltDB.

the class MeshArbiter method discoverGlobalFaultData_rcv.

/**
     * Collect the failure site update messages from all sites This site sent
     * its own mailbox the above broadcast the maximum is local to this site.
     * This also ensures at least one response.
     *
     * Concurrent failures can be detected by additional reports from the FaultDistributor
     * or a mismatch in the set of failed hosts reported in a message from another site
     */
private boolean discoverGlobalFaultData_rcv(Set<Long> hsIds) {
    long blockedOnReceiveStart = System.currentTimeMillis();
    long lastReportTime = 0;
    boolean haveEnough = false;
    int[] forwardStallCount = new int[] { FORWARD_STALL_COUNT };
    do {
        VoltMessage m = m_mailbox.recvBlocking(receiveSubjects, 5);
        /*
             * If fault resolution takes longer then 10 seconds start logging
             */
        final long now = System.currentTimeMillis();
        if (now - blockedOnReceiveStart > 10000) {
            if (now - lastReportTime > 60000) {
                lastReportTime = System.currentTimeMillis();
                haveNecessaryFaultInfo(m_seeker.getSurvivors(), true);
            }
        }
        if (m == null) {
            // Send a heartbeat to keep the dead host timeout active.  Needed because IV2 doesn't
            // generate its own heartbeats to keep this running.
            m_meshAide.sendHeartbeats(m_seeker.getSurvivors());
        } else if (m.getSubject() == Subject.SITE_FAILURE_UPDATE.getId()) {
            SiteFailureMessage sfm = (SiteFailureMessage) m;
            if (!m_seeker.getSurvivors().contains(m.m_sourceHSId) || m_failedSites.contains(m.m_sourceHSId) || m_failedSites.containsAll(sfm.getFailedSites()))
                continue;
            if (!sfm.m_decision.isEmpty()) {
                m_decidedSurvivors.put(sfm.m_sourceHSId, sfm);
            }
            updateFailedSitesLedger(hsIds, sfm);
            m_seeker.add(sfm);
            addForwardCandidate(new SiteFailureForwardMessage(sfm));
            m_recoveryLog.info("Agreement, Received " + sfm);
        } else if (m.getSubject() == Subject.SITE_FAILURE_FORWARD.getId()) {
            SiteFailureForwardMessage fsfm = (SiteFailureForwardMessage) m;
            addForwardCandidate(fsfm);
            if (!hsIds.contains(fsfm.m_sourceHSId) || m_seeker.getSurvivors().contains(fsfm.m_reportingHSId) || m_failedSites.contains(fsfm.m_reportingHSId) || m_failedSites.containsAll(fsfm.getFailedSites()))
                continue;
            m_seeker.add(fsfm);
            m_recoveryLog.info("Agreement, Received forward " + fsfm);
            forwardStallCount[0] = FORWARD_STALL_COUNT;
        } else if (m.getSubject() == Subject.FAILURE.getId()) {
            /*
                 * If the fault distributor reports a new fault, ignore it if it is known , otherwise
                 * re-deliver the message to ourself and then abort so that the process can restart.
                 */
            FaultMessage fm = (FaultMessage) m;
            Discard ignoreIt = mayIgnore(hsIds, fm);
            if (Discard.DoNot == ignoreIt) {
                m_mailbox.deliverFront(m);
                m_recoveryLog.info("Agreement, Detected a concurrent failure from FaultDistributor, new failed site " + CoreUtils.hsIdToString(fm.failedSite));
                return false;
            } else {
                if (m_recoveryLog.isDebugEnabled()) {
                    ignoreIt.log(fm);
                }
            }
        }
        haveEnough = haveEnough || haveNecessaryFaultInfo(m_seeker.getSurvivors(), false);
        if (haveEnough) {
            Iterator<Map.Entry<Long, SiteFailureForwardMessage>> itr = m_forwardCandidates.entrySet().iterator();
            while (itr.hasNext()) {
                Map.Entry<Long, SiteFailureForwardMessage> e = itr.next();
                Set<Long> unseenBy = m_seeker.forWhomSiteIsDead(e.getKey());
                if (unseenBy.size() > 0) {
                    m_mailbox.send(Longs.toArray(unseenBy), e.getValue());
                    m_recoveryLog.info("Agreement, fowarding to " + CoreUtils.hsIdCollectionToString(unseenBy) + " " + e.getValue());
                }
                itr.remove();
            }
        }
    } while (!haveEnough || m_seeker.needForward(forwardStallCount));
    return true;
}
Also used : SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google_voltpatches.common.collect.ImmutableMap)

Example 2 with SiteFailureForwardMessage

use of org.voltcore.messaging.SiteFailureForwardMessage in project voltdb by VoltDB.

the class TestMeshArbiter method testOneLinkDownFromThePerspictiveOfWitness.

@Test
public void testOneLinkDownFromThePerspictiveOfWitness() throws Exception {
    Maker<SiteFailureMessage> s1f = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(0, 2, 3)), with(sfmFailures, sfmFailed(1)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureMessage> s0f = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(1, 2, 3)), with(sfmFailures, sfmFailed(0)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureMessage> s23f = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(0, 1, 2, 3)), with(sfmFailures, sfmFailed(0, 1)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureForwardMessage> uf = a(FailureSiteForwardMessage);
    when(aide.getNewestSafeTransactionForInitiator(0L)).thenReturn(10L);
    when(aide.getNewestSafeTransactionForInitiator(1L)).thenReturn(11L);
    when(mbox.recvBlocking(any(Subject[].class), eq(5L))).thenReturn(make(s23f.but(with(sfmSource, 2L), with(sfmFailures, sfmFailed(0))))).thenReturn(new FaultMessage(2L, 0L, ImmutableSet.of(1L, 2L, 3L))).thenReturn(make(s1f.but(with(sfmSource, 0L)))).thenReturn(make(s23f.but(with(sfmSource, 2L)))).thenReturn(make(s23f.but(with(sfmSource, 3L)))).thenReturn(make(uf.but(with(fsfmSource, 2L), with(fsfmMsg, s0f)))).thenReturn(make(uf.but(with(fsfmSource, 3L), with(fsfmMsg, s0f))));
    Map<Long, Long> decision = arbiter.reconfigureOnFault(hsids, new FaultMessage(0, 1));
    verify(mbox, times(0)).deliverFront(any(VoltMessage.class));
    verify(mbox, times(2)).send(any(long[].class), argThat(siteFailureIs(sfmFailed(1), sfmSurvived(0, 2, 3))));
    assertEquals(decision, ImmutableMap.<Long, Long>of(1L, 11L));
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) Matchers.anyLong(org.mockito.Matchers.anyLong) SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) SiteFailureMessage(org.voltcore.agreement.maker.SiteFailureMessageMaker.SiteFailureMessage) Test(org.junit.Test)

Example 3 with SiteFailureForwardMessage

use of org.voltcore.messaging.SiteFailureForwardMessage in project voltdb by VoltDB.

the class MiniNode method run.

@Override
public void run() {
    m_miniSite.start();
    for (long HSId : m_HSIds) {
        // Don't track your own death
        if (HSId != m_HSId) {
            m_deadTracker.startTracking(HSId);
        }
    }
    m_nodeState.set(NodeState.RUN);
    while (m_shouldContinue.get()) {
        Message msg = m_recvQ.poll();
        synchronized (this) {
            if (msg != null) {
                if (msg.m_close) {
                    int failedHostId = CoreUtils.getHostIdFromHSId(msg.m_src);
                    long agreementHSId = CoreUtils.getHSIdFromHostAndSite(failedHostId, HostMessenger.AGREEMENT_SITE_ID);
                    m_miniSite.reportFault(agreementHSId);
                    m_deadTracker.stopTracking(msg.m_src);
                } else {
                    m_deadTracker.updateHSId(msg.m_src);
                    // inject actual message into mailbox
                    VoltMessage message = msg.m_msg;
                    // snoop for SiteFailureMessage, inject into MiniSite's mailbox
                    if (message instanceof SiteFailureMessage && !(message instanceof SiteFailureForwardMessage)) {
                        SiteFailureMessage sfm = (SiteFailureMessage) message;
                        for (FaultMessage fm : sfm.asFaultMessages()) {
                            m_miniSite.reportFault(fm);
                        }
                    }
                    m_mailbox.deliver(message);
                }
            }
            // Do dead host detection.  Need to keep track of receive gaps from the remaining set
            // of live hosts.
            Set<Long> deadHosts = m_deadTracker.checkTimeouts();
            for (long HSId : deadHosts) {
                int failedHostId = CoreUtils.getHostIdFromHSId(HSId);
                long agreementHSId = CoreUtils.getHSIdFromHostAndSite(failedHostId, HostMessenger.AGREEMENT_SITE_ID);
                m_miniSite.reportFault(agreementHSId);
                m_deadTracker.stopTracking(HSId);
            }
        }
    }
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage) FaultMessage(org.voltcore.messaging.FaultMessage) VoltMessage(org.voltcore.messaging.VoltMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) Message(org.voltcore.agreement.FakeMesh.Message) SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage)

Example 4 with SiteFailureForwardMessage

use of org.voltcore.messaging.SiteFailureForwardMessage in project voltdb by VoltDB.

the class MeshArbiter method addForwardCandidate.

protected void addForwardCandidate(SiteFailureForwardMessage sffm) {
    SiteFailureForwardMessage prev = m_forwardCandidates.get(sffm.m_reportingHSId);
    if (prev != null && prev.m_survivors.size() < sffm.m_survivors.size())
        return;
    m_forwardCandidates.put(sffm.m_reportingHSId, sffm);
}
Also used : SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage)

Aggregations

SiteFailureForwardMessage (org.voltcore.messaging.SiteFailureForwardMessage)4 FaultMessage (org.voltcore.messaging.FaultMessage)3 SiteFailureMessage (org.voltcore.messaging.SiteFailureMessage)3 VoltMessage (org.voltcore.messaging.VoltMessage)3 ImmutableMap (com.google_voltpatches.common.collect.ImmutableMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Test (org.junit.Test)1 Matchers.anyLong (org.mockito.Matchers.anyLong)1 Message (org.voltcore.agreement.FakeMesh.Message)1 SiteFailureMessage (org.voltcore.agreement.maker.SiteFailureMessageMaker.SiteFailureMessage)1