Search in sources :

Example 1 with SiteFailureMessage

use of org.voltcore.messaging.SiteFailureMessage in project voltdb by VoltDB.

the class TestMeshArbiter method testBasicScenario.

@Test
public void testBasicScenario() throws Exception {
    Maker<SiteFailureMessage> siteOneSfm = a(SiteFailureMessage, with(sfmSurvivors, sfmSurvived(0, 2, 3)), with(sfmFailures, sfmFailed(1)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    when(aide.getNewestSafeTransactionForInitiator(anyLong())).thenReturn(11L);
    when(mbox.recvBlocking(any(Subject[].class), eq(5L))).thenReturn(make(siteOneSfm.but(with(sfmSource, 0L)))).thenReturn(make(siteOneSfm.but(with(sfmSource, 2L)))).thenReturn(make(siteOneSfm.but(with(sfmSource, 3L))));
    Map<Long, Long> decision = arbiter.reconfigureOnFault(hsids, new FaultMessage(0, 1));
    verify(mbox, times(2)).send(any(long[].class), argThat(siteFailureIs(sfmFailed(1), sfmSurvived(0, 2, 3))));
    assertEquals(decision, ImmutableMap.<Long, Long>of(1L, 11L));
}
Also used : FaultMessage(org.voltcore.messaging.FaultMessage) Matchers.anyLong(org.mockito.Matchers.anyLong) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) SiteFailureMessage(org.voltcore.agreement.maker.SiteFailureMessageMaker.SiteFailureMessage) Test(org.junit.Test)

Example 2 with SiteFailureMessage

use of org.voltcore.messaging.SiteFailureMessage in project voltdb by VoltDB.

the class TestMeshArbiter method testInterleavedFailures.

@Test
public void testInterleavedFailures() throws Exception {
    Maker<SiteFailureMessage> siteOneSfm = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(0, 2, 3)), with(sfmFailures, sfmFailed(1)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureMessage> siteTwoSfm = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(0, 3)), with(sfmFailures, sfmFailed(1, 2)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    when(aide.getNewestSafeTransactionForInitiator(1L)).thenReturn(11L);
    when(aide.getNewestSafeTransactionForInitiator(2L)).thenReturn(22L);
    when(mbox.recvBlocking(any(Subject[].class), eq(5L))).thenReturn(make(siteOneSfm.but(with(sfmSource, 0L)))).thenReturn(new FaultMessage(0, 2L));
    Map<Long, Long> decision = arbiter.reconfigureOnFault(hsids, new FaultMessage(0, 1));
    verify(mbox, times(1)).deliverFront(any(VoltMessage.class));
    verify(mbox, times(1)).send(any(long[].class), any(VoltMessage.class));
    verify(mbox).send(any(long[].class), argThat(siteFailureIs(sfmFailed(1), sfmSurvived(0, 2, 3))));
    assertEquals(decision, ImmutableMap.<Long, Long>of());
    reset(mbox);
    when(mbox.recvBlocking(any(Subject[].class), eq(5L))).thenReturn(make(siteOneSfm.but(with(sfmSource, 3L)))).thenReturn(make(siteTwoSfm.but(with(sfmSource, 0L)))).thenReturn(make(siteTwoSfm.but(with(sfmSource, 3L))));
    decision = arbiter.reconfigureOnFault(hsids, new FaultMessage(0, 2));
    verify(mbox, never()).deliverFront(any(VoltMessage.class));
    verify(mbox, times(2)).send(any(long[].class), any(VoltMessage.class));
    verify(mbox, times(2)).send(any(long[].class), argThat(siteFailureIs(sfmFailed(1, 2), sfmSurvived(0, 3))));
    assertEquals(decision, ImmutableMap.<Long, Long>of(1L, 11L, 2L, 22L));
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) Matchers.anyLong(org.mockito.Matchers.anyLong) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) SiteFailureMessage(org.voltcore.agreement.maker.SiteFailureMessageMaker.SiteFailureMessage) Subject(org.voltcore.messaging.Subject) Test(org.junit.Test)

Example 3 with SiteFailureMessage

use of org.voltcore.messaging.SiteFailureMessage in project voltdb by VoltDB.

the class TestMeshArbiter method testOneLinkDownFromThePerspictiveOfWitness.

@Test
public void testOneLinkDownFromThePerspictiveOfWitness() throws Exception {
    Maker<SiteFailureMessage> s1f = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(0, 2, 3)), with(sfmFailures, sfmFailed(1)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureMessage> s0f = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(1, 2, 3)), with(sfmFailures, sfmFailed(0)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureMessage> s23f = a(SiteFailureMessage, with(sfmSurvivors, Longs.asList(0, 1, 2, 3)), with(sfmFailures, sfmFailed(0, 1)), with(sfmSafeTxns, sfmSafe(0, 10, 1, 11, 2, 22, 3, 33)));
    Maker<SiteFailureForwardMessage> uf = a(FailureSiteForwardMessage);
    when(aide.getNewestSafeTransactionForInitiator(0L)).thenReturn(10L);
    when(aide.getNewestSafeTransactionForInitiator(1L)).thenReturn(11L);
    when(mbox.recvBlocking(any(Subject[].class), eq(5L))).thenReturn(make(s23f.but(with(sfmSource, 2L), with(sfmFailures, sfmFailed(0))))).thenReturn(new FaultMessage(2L, 0L, ImmutableSet.of(1L, 2L, 3L))).thenReturn(make(s1f.but(with(sfmSource, 0L)))).thenReturn(make(s23f.but(with(sfmSource, 2L)))).thenReturn(make(s23f.but(with(sfmSource, 3L)))).thenReturn(make(uf.but(with(fsfmSource, 2L), with(fsfmMsg, s0f)))).thenReturn(make(uf.but(with(fsfmSource, 3L), with(fsfmMsg, s0f))));
    Map<Long, Long> decision = arbiter.reconfigureOnFault(hsids, new FaultMessage(0, 1));
    verify(mbox, times(0)).deliverFront(any(VoltMessage.class));
    verify(mbox, times(2)).send(any(long[].class), argThat(siteFailureIs(sfmFailed(1), sfmSurvived(0, 2, 3))));
    assertEquals(decision, ImmutableMap.<Long, Long>of(1L, 11L));
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) Matchers.anyLong(org.mockito.Matchers.anyLong) SiteFailureForwardMessage(org.voltcore.messaging.SiteFailureForwardMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage) SiteFailureMessage(org.voltcore.agreement.maker.SiteFailureMessageMaker.SiteFailureMessage) Test(org.junit.Test)

Example 4 with SiteFailureMessage

use of org.voltcore.messaging.SiteFailureMessage in project voltdb by VoltDB.

the class MeshArbiter method discoverGlobalFaultData_send.

/**
     * Send one message to each surviving execution site providing this site's
     * multi-partition commit point and this site's safe txnid
     * (the receiver will filter the later for its
     * own partition). Do this once for each failed initiator that we know about.
     * Sends all data all the time to avoid a need for request/response.
     */
private void discoverGlobalFaultData_send(Set<Long> hsIds) {
    Set<Long> dests = Sets.filter(m_seeker.getSurvivors(), not(equalTo(m_hsId)));
    SiteFailureMessage.Builder msgBuilder = SiteFailureMessage.builder().survivors(m_seeker.getSurvivors()).failures(m_inTrouble.keySet()).safeTxnIds(getSafeTxnIdsForSites(hsIds));
    SiteFailureMessage sfm = msgBuilder.build();
    sfm.m_sourceHSId = m_hsId;
    updateFailedSitesLedger(hsIds, sfm);
    m_seeker.add(sfm);
    m_mailbox.send(Longs.toArray(dests), sfm);
    m_recoveryLog.info("Agreement, Sending survivors " + sfm);
}
Also used : SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage)

Example 5 with SiteFailureMessage

use of org.voltcore.messaging.SiteFailureMessage in project voltdb by VoltDB.

the class MeshArbiter method notifyOnKill.

/**
     * Notify all survivors when you are closing links to nodes
     * @param decision map where the keys contain the kill sites
     *   and its values are their last known safe transaction ids
     * @return true if successfully confirmed that all survivors
     * agree on the decision, false otherwise.
     */
protected boolean notifyOnKill(Set<Long> hsIds, Map<Long, Long> decision) {
    SiteFailureMessage.Builder sfmb = SiteFailureMessage.builder().decisions(decision.keySet()).failures(decision.keySet());
    Set<Long> dests = Sets.filter(m_seeker.getSurvivors(), not(equalTo(m_hsId)));
    if (dests.isEmpty())
        return true;
    sfmb.survivors(Sets.difference(m_seeker.getSurvivors(), decision.keySet()));
    sfmb.safeTxnIds(getSafeTxnIdsForSites(hsIds));
    SiteFailureMessage sfm = sfmb.build();
    m_mailbox.send(Longs.toArray(dests), sfm);
    m_recoveryLog.info("Agreement, Sending [" + CoreUtils.hsIdCollectionToString(dests) + "]  " + sfm);
    // that we've entered a loop, exit here.
    if (m_localHistoricDecisions.size() >= 100) {
        // Too many decisions have been made without converging
        RateLimitedLogger.tryLogForMessage(System.currentTimeMillis(), 10, TimeUnit.SECONDS, m_recoveryLog, Level.WARN, "Agreement, %d local decisions have been made without converging", m_localHistoricDecisions.size());
    }
    for (SiteFailureMessage lhd : m_localHistoricDecisions) {
        if (lhd.m_survivors.equals(sfm.m_survivors)) {
            m_recoveryLog.info("Agreement, detected decision loop. Exiting");
            return true;
        }
    }
    m_localHistoricDecisions.add(sfm);
    // Wait for all survivors in the local decision to send their decisions over.
    // If one of the host's decision conflicts with ours, remove that host's link
    // and repeat the decision process.
    final Set<Long> expectedSurvivors = Sets.filter(sfm.m_survivors, not(equalTo(m_hsId)));
    m_recoveryLog.info("Agreement, Waiting for agreement on decision from survivors " + CoreUtils.hsIdCollectionToString(expectedSurvivors));
    final Iterator<SiteFailureMessage> iter = m_decidedSurvivors.values().iterator();
    while (iter.hasNext()) {
        final SiteFailureMessage remoteDecision = iter.next();
        if (expectedSurvivors.contains(remoteDecision.m_sourceHSId)) {
            if (remoteDecision.m_decision.contains(m_hsId)) {
                iter.remove();
                m_recoveryLog.info("Agreement, Received inconsistent decision from " + CoreUtils.hsIdToString(remoteDecision.m_sourceHSId) + ", " + remoteDecision);
                final FaultMessage localFault = new FaultMessage(m_hsId, remoteDecision.m_sourceHSId);
                localFault.m_sourceHSId = m_hsId;
                m_mailbox.deliverFront(localFault);
                return false;
            }
        }
    }
    long start = System.currentTimeMillis();
    boolean allDecisionsMatch = true;
    do {
        final VoltMessage msg = m_mailbox.recvBlocking(receiveSubjects, 5);
        if (msg == null) {
            // Send a heartbeat to keep the dead host timeout active.
            m_meshAide.sendHeartbeats(m_seeker.getSurvivors());
            final long duration = System.currentTimeMillis() - start;
            if (duration > 20000) {
                m_recoveryLog.error("Agreement, Still waiting for decisions from " + CoreUtils.hsIdCollectionToString(Sets.difference(expectedSurvivors, m_decidedSurvivors.keySet())) + " after " + TimeUnit.MILLISECONDS.toSeconds(duration) + " seconds");
                start = System.currentTimeMillis();
            }
            continue;
        }
        if (m_hsId != msg.m_sourceHSId && !expectedSurvivors.contains(msg.m_sourceHSId)) {
            // Ignore messages from failed sites
            continue;
        }
        if (msg.getSubject() == Subject.SITE_FAILURE_UPDATE.getId()) {
            final SiteFailureMessage fm = (SiteFailureMessage) msg;
            if (!fm.m_decision.isEmpty()) {
                if (expectedSurvivors.contains(fm.m_sourceHSId)) {
                    if (fm.m_decision.contains(m_hsId)) {
                        m_decidedSurvivors.remove(fm.m_sourceHSId);
                        // The remote host has decided that we are gone, remove the remote host
                        final FaultMessage localFault = new FaultMessage(m_hsId, fm.m_sourceHSId);
                        localFault.m_sourceHSId = m_hsId;
                        m_mailbox.deliverFront(localFault);
                        return false;
                    } else {
                        m_decidedSurvivors.put(fm.m_sourceHSId, fm);
                    }
                }
            } else {
                m_mailbox.deliverFront(fm);
                return false;
            }
        } else if (msg.getSubject() == Subject.FAILURE.getId()) {
            final FaultMessage fm = (FaultMessage) msg;
            if (!fm.decided) {
                // In case of concurrent fault, handle it
                m_mailbox.deliverFront(msg);
                return false;
            } else if (mayIgnore(hsIds, fm) == Discard.DoNot) {
                m_mailbox.deliverFront(msg);
                return false;
            }
        }
        for (SiteFailureMessage remoteDecision : m_decidedSurvivors.values()) {
            if (!sfm.m_survivors.equals(remoteDecision.m_survivors)) {
                allDecisionsMatch = false;
            }
        }
    } while (!m_decidedSurvivors.keySet().containsAll(expectedSurvivors) && allDecisionsMatch);
    return true;
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) FaultMessage(org.voltcore.messaging.FaultMessage) SiteFailureMessage(org.voltcore.messaging.SiteFailureMessage)

Aggregations

SiteFailureMessage (org.voltcore.messaging.SiteFailureMessage)12 FaultMessage (org.voltcore.messaging.FaultMessage)11 VoltMessage (org.voltcore.messaging.VoltMessage)9 Test (org.junit.Test)8 Matchers.anyLong (org.mockito.Matchers.anyLong)8 SiteFailureMessage (org.voltcore.agreement.maker.SiteFailureMessageMaker.SiteFailureMessage)8 Subject (org.voltcore.messaging.Subject)5 SiteFailureForwardMessage (org.voltcore.messaging.SiteFailureForwardMessage)3 ImmutableMap (com.google_voltpatches.common.collect.ImmutableMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Message (org.voltcore.agreement.FakeMesh.Message)1