Search in sources :

Example 1 with RepairResult

use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.

the class SpInitiator method acceptPromotion.

@Override
public void acceptPromotion() {
    try {
        long startTime = System.currentTimeMillis();
        Boolean success = false;
        m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
        m_term.start();
        while (!success) {
            RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
            // the MPI and/or be unexpected to external clients.
            if (!m_initiatorMailbox.acceptPromotion()) {
                tmLog.error(m_whoami + "rejoining site can not be promoted to leader. Terminating.");
                VoltDB.crashLocalVoltDB("A rejoining site can not be promoted to leader.", false, null);
                return;
            }
            // term syslogs the start of leader promotion.
            long txnid = Long.MIN_VALUE;
            try {
                RepairResult res = repair.start().get();
                txnid = res.m_txnId;
                success = true;
            } catch (CancellationException e) {
                success = false;
            }
            if (success) {
                m_initiatorMailbox.setLeaderState(txnid);
                tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
                // THIS IS where map cache should be updated, not
                // in the promotion algorithm.
                LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
                iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
            } else {
                // The only known reason to fail is a failed replica during
                // recovery; that's a bounded event (by k-safety).
                // CrashVoltDB here means one node failure causing another.
                // Don't create a cascading failure - just try again.
                tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
            }
        }
        // Tag along and become the export master too
        ExportManager.instance().acceptMastership(m_partitionId);
    } catch (Exception e) {
        VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
    }
}
Also used : CancellationException(java.util.concurrent.CancellationException) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) RepairResult(org.voltdb.iv2.RepairAlgo.RepairResult)

Example 2 with RepairResult

use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.

the class TestMpPromoteAlgo method testFuzz.

@Test
public void testFuzz() throws Exception {
    System.out.println("Running testFuzz");
    InitiatorMailbox mbox = mock(InitiatorMailbox.class);
    Random rand = new Random(System.currentTimeMillis());
    // Generate a random message stream to several "replicas", interrupted
    // at random points to all but one.  Validate that promotion repair
    // results in identical, correct, repair streams to all replicas.
    TxnEgo sphandle = TxnEgo.makeZero(0);
    UniqueIdGenerator uig = new UniqueIdGenerator(0, 0);
    sphandle = sphandle.makeNext();
    RandomMsgGenerator msgGen = new RandomMsgGenerator();
    boolean[] stops = new boolean[3];
    RepairLog[] logs = new RepairLog[3];
    for (int i = 0; i < 3; i++) {
        logs[i] = new RepairLog();
        stops[i] = false;
    }
    for (int i = 0; i < 4000; i++) {
        // get next message, update the sphandle according to SpScheduler rules,
        // but only submit messages that would have been forwarded by the master
        // to the repair log.
        TransactionInfoBaseMessage msg = msgGen.generateRandomMessageInStream();
        msg.setSpHandle(sphandle.getTxnId());
        sphandle = sphandle.makeNext();
        if (!msg.isReadOnly() || msg instanceof CompleteTransactionMessage) {
            if (!stops[0]) {
                logs[0].deliver(msg);
            }
            if (!stops[1]) {
                logs[1].deliver(msg);
            }
            logs[2].deliver(msg);
            // be fed any transactions
            for (int j = 0; j < 2; j++) {
                // Hacky way to get spaced failures
                if (rand.nextDouble() < (.01 / ((j + 1) * 5))) {
                    stops[j] = true;
                }
            }
        }
    }
    List<Long> survivors = new ArrayList<Long>();
    survivors.add(0l);
    survivors.add(1l);
    survivors.add(2l);
    MpPromoteAlgo dut = new MpPromoteAlgo(survivors, mbox, "bleh ");
    Future<RepairResult> result = dut.start();
    for (int i = 0; i < 3; i++) {
        List<Iv2RepairLogResponseMessage> stuff = logs[i].contents(dut.getRequestId(), true);
        System.out.println("Repair log size from: " + i + ": " + stuff.size());
        for (Iv2RepairLogResponseMessage msg : stuff) {
            msg.m_sourceHSId = (long) i;
            dut.deliver(msg);
        }
    }
    result.get();
    assertFalse(result.isCancelled());
    assertTrue(result.isDone());
    // Unfortunately, it's painful to try to stub things to make repairSurvivors() work, so we'll
    // go and inspect the guts of SpPromoteAlgo instead.  This iteration is largely a copy of the inner loop
    // of repairSurvivors()
    List<TransactionInfoBaseMessage> finalStream = new ArrayList<TransactionInfoBaseMessage>();
    for (Iv2RepairLogResponseMessage li : dut.m_repairLogUnion) {
        VoltMessage msg = dut.createRepairMessage(li);
        finalStream.add((TransactionInfoBaseMessage) msg);
    }
    // Check the sanity of the repair stream generated by the MPI.
    long lastTxnId = Long.MIN_VALUE;
    boolean seenFrag = false;
    for (TransactionInfoBaseMessage msg : finalStream) {
        if (lastTxnId == Long.MIN_VALUE) {
            lastTxnId = msg.getTxnId();
        } else {
            assertTrue(msg.getTxnId() > lastTxnId);
            lastTxnId = msg.getTxnId();
        }
        if (msg instanceof FragmentTaskMessage) {
            assertFalse(seenFrag);
            seenFrag = true;
        }
    }
}
Also used : Iv2RepairLogResponseMessage(org.voltdb.messaging.Iv2RepairLogResponseMessage) FragmentTaskMessage(org.voltdb.messaging.FragmentTaskMessage) ArrayList(java.util.ArrayList) RepairResult(org.voltdb.iv2.RepairAlgo.RepairResult) VoltMessage(org.voltcore.messaging.VoltMessage) Random(java.util.Random) CompleteTransactionMessage(org.voltdb.messaging.CompleteTransactionMessage) TransactionInfoBaseMessage(org.voltcore.messaging.TransactionInfoBaseMessage) Test(org.junit.Test)

Example 3 with RepairResult

use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.

the class TestMpPromoteAlgo method testSlowDieOff.

@Test
public void testSlowDieOff() throws InterruptedException, ExecutionException {
    System.out.println("Running testSlowDieOff");
    InitiatorMailbox mailbox = mock(MpInitiatorMailbox.class);
    doReturn(4L).when(mailbox).getHSId();
    InOrder inOrder = inOrder(mailbox);
    ArrayList<Long> masters = new ArrayList<Long>();
    masters.add(1L);
    masters.add(2L);
    masters.add(3L);
    MpPromoteAlgo algo = new MpPromoteAlgo(masters, mailbox, "Test");
    long requestId = algo.getRequestId();
    Future<RepairResult> result = algo.start();
    // Master 1
    // First, everyone completed
    // has a frag for txn 1000. MP handle is 1000L
    algo.deliver(makeRealAckResponse(requestId, 1L, 0, 8, txnEgo(1000L), m_hashinatorConfig));
    algo.deliver(makeRealFragResponse(requestId, 1L, 1, 8, txnEgo(1000L)));
    algo.deliver(makeRealCompleteResponse(requestId, 1L, 2, 8, txnEgo(1000L)));
    // Second, 3 will lose complete
    algo.deliver(makeRealFragResponse(requestId, 1L, 3, 8, txnEgo(1001L)));
    algo.deliver(makeRealCompleteResponse(requestId, 1L, 4, 8, txnEgo(1001L)));
    // Third, 2 will lose complete and 3 has nothing
    algo.deliver(makeRealFragResponse(requestId, 1L, 5, 8, txnEgo(1002L)));
    algo.deliver(makeRealCompleteResponse(requestId, 1L, 6, 8, txnEgo(1002L)));
    // Fourth, 1 just has a fragment, the other two are gone.
    algo.deliver(makeRealFragResponse(requestId, 1L, 7, 8, txnEgo(1003L)));
    // Master 2
    // has only the normal ack. Never saw an MP transaction.
    algo.deliver(makeRealAckResponse(requestId, 2L, 0, 6, txnEgo(1000L), m_hashinatorConfig));
    algo.deliver(makeRealFragResponse(requestId, 2L, 1, 6, txnEgo(1000L)));
    algo.deliver(makeRealCompleteResponse(requestId, 2L, 2, 6, txnEgo(1000L)));
    // second, 3 loses complete
    algo.deliver(makeRealFragResponse(requestId, 2L, 3, 6, txnEgo(1001L)));
    algo.deliver(makeRealCompleteResponse(requestId, 2L, 4, 6, txnEgo(1001L)));
    // third, 2 (us) loses complete
    algo.deliver(makeRealFragResponse(requestId, 2L, 5, 6, txnEgo(1002L)));
    // Master 3
    // also has a complete. MP handle is 1000L
    algo.deliver(makeRealAckResponse(requestId, 3L, 0, 4, txnEgo(1000L), m_hashinatorConfig));
    algo.deliver(makeRealFragResponse(requestId, 3L, 1, 4, txnEgo(1000L)));
    algo.deliver(makeRealCompleteResponse(requestId, 3L, 2, 4, txnEgo(1000L)));
    // 3 loses complete
    algo.deliver(makeRealFragResponse(requestId, 3L, 3, 4, txnEgo(1001L)));
    // MPI
    // Deliver the last complete
    algo.deliver(makeRealAckResponse(requestId, 4L, 0, 2, txnEgo(1002L), m_hashinatorConfig));
    algo.deliver(makeRealCompleteResponse(requestId, 4L, 1, 2, txnEgo(1002L)));
    // We should send to all hosts in all cases for all non-truncated MP txns now
    List<Long> needsRepair = new ArrayList<Long>();
    needsRepair.add(1L);
    needsRepair.add(2L);
    needsRepair.add(3L);
    inOrder.verify(mailbox, times(4)).repairReplicasWith(eq(needsRepair), any(Iv2RepairLogResponseMessage.class));
    assertEquals(txnEgo(1003L), result.get().m_txnId);
}
Also used : Iv2RepairLogResponseMessage(org.voltdb.messaging.Iv2RepairLogResponseMessage) InOrder(org.mockito.InOrder) ArrayList(java.util.ArrayList) RepairResult(org.voltdb.iv2.RepairAlgo.RepairResult) Test(org.junit.Test)

Example 4 with RepairResult

use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.

the class MpInitiator method acceptPromotion.

@Override
public void acceptPromotion() {
    try {
        long startTime = System.currentTimeMillis();
        Boolean success = false;
        m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
        m_term.start();
        while (!success) {
            final RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
            // term syslogs the start of leader promotion.
            long txnid = Long.MIN_VALUE;
            try {
                RepairResult res = repair.start().get();
                txnid = res.m_txnId;
                success = true;
            } catch (CancellationException e) {
                success = false;
            }
            if (success) {
                m_initiatorMailbox.setLeaderState(txnid);
                List<Iv2InitiateTaskMessage> restartTxns = ((MpPromoteAlgo) repair).getInterruptedTxns();
                if (!restartTxns.isEmpty()) {
                    // Should only be one restarting MP txn
                    if (restartTxns.size() > 1) {
                        tmLog.fatal("Detected a fatal condition while repairing multipartition transactions " + "following a cluster topology change.");
                        tmLog.fatal("The MPI found multiple transactions requiring restart: ");
                        for (Iv2InitiateTaskMessage txn : restartTxns) {
                            tmLog.fatal("Restart candidate: " + txn);
                        }
                        tmLog.fatal("This node will fail.  Please contact VoltDB support with your cluster's " + "log files.");
                        m_initiatorMailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_term.getInterestingHSIds().get()), new DumpMessage());
                        throw new RuntimeException("Failing promoted MPI node with unresolvable repair condition.");
                    }
                    tmLog.debug(m_whoami + " restarting MP transaction: " + restartTxns.get(0));
                    m_initiatorMailbox.repairReplicasWith(null, restartTxns.get(0));
                }
                tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
                // THIS IS where map cache should be updated, not
                // in the promotion algorithm.
                LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
                iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
            } else {
                // The only known reason to fail is a failed replica during
                // recovery; that's a bounded event (by k-safety).
                // CrashVoltDB here means one node failure causing another.
                // Don't create a cascading failure - just try again.
                tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
            }
        }
    } catch (Exception e) {
        VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
    }
}
Also used : Iv2InitiateTaskMessage(org.voltdb.messaging.Iv2InitiateTaskMessage) DumpMessage(org.voltdb.messaging.DumpMessage) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) RepairResult(org.voltdb.iv2.RepairAlgo.RepairResult) CancellationException(java.util.concurrent.CancellationException)

Example 5 with RepairResult

use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.

the class TestSpPromoteAlgo method testFuzz.

@Test
public void testFuzz() throws Exception {
    InitiatorMailbox mbox = mock(InitiatorMailbox.class);
    Map<Long, List<TransactionInfoBaseMessage>> finalStreams = new HashMap<Long, List<TransactionInfoBaseMessage>>();
    Random rand = new Random(System.currentTimeMillis());
    // Generate a random message stream to several "replicas", interrupted
    // at random points to all but one.  Validate that promotion repair
    // results in identical, correct, repair streams to all replicas.
    TxnEgo sphandle = TxnEgo.makeZero(0);
    UniqueIdGenerator spbuig = new UniqueIdGenerator(0, 0);
    UniqueIdGenerator mpbuig = new UniqueIdGenerator(0, 0);
    sphandle = sphandle.makeNext();
    RandomMsgGenerator msgGen = new RandomMsgGenerator();
    boolean[] stops = new boolean[3];
    RepairLog[] logs = new RepairLog[3];
    for (int i = 0; i < 3; i++) {
        logs[i] = new RepairLog();
        stops[i] = false;
        finalStreams.put((long) i, new ArrayList<TransactionInfoBaseMessage>());
    }
    long maxBinaryLogSpUniqueId = Long.MIN_VALUE;
    for (int i = 0; i < 4000; i++) {
        // get next message, update the sphandle according to SpScheduler rules,
        // but only submit messages that would have been forwarded by the master
        // to the repair log.
        TransactionInfoBaseMessage msg = msgGen.generateRandomMessageInStream();
        msg.setSpHandle(sphandle.getTxnId());
        if (msg instanceof Iv2InitiateTaskMessage) {
            Pair<Long, Long> uids = TestRepairLog.setBinaryLogUniqueId(msg, spbuig, mpbuig);
            maxBinaryLogSpUniqueId = Math.max(maxBinaryLogSpUniqueId, uids.getFirst());
        }
        sphandle = sphandle.makeNext();
        if (!msg.isReadOnly() || msg instanceof CompleteTransactionMessage) {
            if (!stops[0]) {
                logs[0].deliver(msg);
            }
            if (!stops[1]) {
                logs[1].deliver(msg);
            }
            logs[2].deliver(msg);
            // be fed any transactions
            for (int j = 0; j < 2; j++) {
                // Hacky way to get spaced failures
                if (rand.nextDouble() < (.01 / ((j + 1) * 5))) {
                    stops[j] = true;
                }
            }
        }
    }
    List<Long> survivors = new ArrayList<Long>();
    survivors.add(0l);
    survivors.add(1l);
    survivors.add(2l);
    SpPromoteAlgo dut = new SpPromoteAlgo(survivors, mbox, "bleh ", 0);
    Future<RepairResult> result = dut.start();
    for (int i = 0; i < 3; i++) {
        List<Iv2RepairLogResponseMessage> stuff = logs[i].contents(dut.getRequestId(), false);
        System.out.println("Repair log size from: " + i + ": " + stuff.size());
        for (Iv2RepairLogResponseMessage msg : stuff) {
            msg.m_sourceHSId = i;
            dut.deliver(msg);
            // First message is metadata only, skip it in validation stream
            if (msg.getSequence() > 0) {
                //System.out.println("finalstreams: " + finalStreams);
                //System.out.println("get(i): " + i + ": " + finalStreams.get((long)i));
                //System.out.println("msg: " + msg);
                finalStreams.get((long) i).add((TransactionInfoBaseMessage) msg.getPayload());
            }
        }
    }
    assertFalse(result.isCancelled());
    assertTrue(result.isDone());
    // of repairSurvivors()
    for (Iv2RepairLogResponseMessage li : dut.m_repairLogUnion) {
        for (Entry<Long, SpPromoteAlgo.ReplicaRepairStruct> entry : dut.m_replicaRepairStructs.entrySet()) {
            if (entry.getValue().needs(li.getHandle())) {
                // append the missing message for this 'node' to the list of messages that node has seen
                finalStreams.get(entry.getKey()).add((TransactionInfoBaseMessage) li.getPayload());
            }
        }
    }
    // check that all the lists for all the nodes are identical after repair
    int longest = Integer.MIN_VALUE;
    for (Entry<Long, List<TransactionInfoBaseMessage>> entry : finalStreams.entrySet()) {
        System.out.println("SIZE: " + entry.getValue().size());
        if (entry.getValue().size() > longest) {
            if (longest == Integer.MIN_VALUE) {
                longest = entry.getValue().size();
            } else {
                fail("Mismatch in repair stream size!");
            }
        }
    }
    for (int i = 0; i < longest; i++) {
        TransactionInfoBaseMessage current = null;
        for (Entry<Long, List<TransactionInfoBaseMessage>> entry : finalStreams.entrySet()) {
            TransactionInfoBaseMessage msg = entry.getValue().get(i);
            if (current == null) {
                current = msg;
            } else {
                assertEquals(current.getSpHandle(), msg.getSpHandle());
                assertEquals(current.getClass(), msg.getClass());
            }
        }
    }
}
Also used : Iv2RepairLogResponseMessage(org.voltdb.messaging.Iv2RepairLogResponseMessage) HashMap(java.util.HashMap) Iv2InitiateTaskMessage(org.voltdb.messaging.Iv2InitiateTaskMessage) ArrayList(java.util.ArrayList) Random(java.util.Random) CompleteTransactionMessage(org.voltdb.messaging.CompleteTransactionMessage) TransactionInfoBaseMessage(org.voltcore.messaging.TransactionInfoBaseMessage) ArrayList(java.util.ArrayList) List(java.util.List) RepairResult(org.voltdb.iv2.RepairAlgo.RepairResult) Test(org.junit.Test)

Aggregations

RepairResult (org.voltdb.iv2.RepairAlgo.RepairResult)5 ArrayList (java.util.ArrayList)3 Test (org.junit.Test)3 Iv2RepairLogResponseMessage (org.voltdb.messaging.Iv2RepairLogResponseMessage)3 Random (java.util.Random)2 CancellationException (java.util.concurrent.CancellationException)2 ExecutionException (java.util.concurrent.ExecutionException)2 KeeperException (org.apache.zookeeper_voltpatches.KeeperException)2 TransactionInfoBaseMessage (org.voltcore.messaging.TransactionInfoBaseMessage)2 CompleteTransactionMessage (org.voltdb.messaging.CompleteTransactionMessage)2 Iv2InitiateTaskMessage (org.voltdb.messaging.Iv2InitiateTaskMessage)2 HashMap (java.util.HashMap)1 List (java.util.List)1 InOrder (org.mockito.InOrder)1 VoltMessage (org.voltcore.messaging.VoltMessage)1 DumpMessage (org.voltdb.messaging.DumpMessage)1 FragmentTaskMessage (org.voltdb.messaging.FragmentTaskMessage)1