use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.
the class SpInitiator method acceptPromotion.
@Override
public void acceptPromotion() {
try {
long startTime = System.currentTimeMillis();
Boolean success = false;
m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
m_term.start();
while (!success) {
RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
// the MPI and/or be unexpected to external clients.
if (!m_initiatorMailbox.acceptPromotion()) {
tmLog.error(m_whoami + "rejoining site can not be promoted to leader. Terminating.");
VoltDB.crashLocalVoltDB("A rejoining site can not be promoted to leader.", false, null);
return;
}
// term syslogs the start of leader promotion.
long txnid = Long.MIN_VALUE;
try {
RepairResult res = repair.start().get();
txnid = res.m_txnId;
success = true;
} catch (CancellationException e) {
success = false;
}
if (success) {
m_initiatorMailbox.setLeaderState(txnid);
tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
// THIS IS where map cache should be updated, not
// in the promotion algorithm.
LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
} else {
// The only known reason to fail is a failed replica during
// recovery; that's a bounded event (by k-safety).
// CrashVoltDB here means one node failure causing another.
// Don't create a cascading failure - just try again.
tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
}
}
// Tag along and become the export master too
ExportManager.instance().acceptMastership(m_partitionId);
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
}
}
use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.
the class TestMpPromoteAlgo method testFuzz.
@Test
public void testFuzz() throws Exception {
System.out.println("Running testFuzz");
InitiatorMailbox mbox = mock(InitiatorMailbox.class);
Random rand = new Random(System.currentTimeMillis());
// Generate a random message stream to several "replicas", interrupted
// at random points to all but one. Validate that promotion repair
// results in identical, correct, repair streams to all replicas.
TxnEgo sphandle = TxnEgo.makeZero(0);
UniqueIdGenerator uig = new UniqueIdGenerator(0, 0);
sphandle = sphandle.makeNext();
RandomMsgGenerator msgGen = new RandomMsgGenerator();
boolean[] stops = new boolean[3];
RepairLog[] logs = new RepairLog[3];
for (int i = 0; i < 3; i++) {
logs[i] = new RepairLog();
stops[i] = false;
}
for (int i = 0; i < 4000; i++) {
// get next message, update the sphandle according to SpScheduler rules,
// but only submit messages that would have been forwarded by the master
// to the repair log.
TransactionInfoBaseMessage msg = msgGen.generateRandomMessageInStream();
msg.setSpHandle(sphandle.getTxnId());
sphandle = sphandle.makeNext();
if (!msg.isReadOnly() || msg instanceof CompleteTransactionMessage) {
if (!stops[0]) {
logs[0].deliver(msg);
}
if (!stops[1]) {
logs[1].deliver(msg);
}
logs[2].deliver(msg);
// be fed any transactions
for (int j = 0; j < 2; j++) {
// Hacky way to get spaced failures
if (rand.nextDouble() < (.01 / ((j + 1) * 5))) {
stops[j] = true;
}
}
}
}
List<Long> survivors = new ArrayList<Long>();
survivors.add(0l);
survivors.add(1l);
survivors.add(2l);
MpPromoteAlgo dut = new MpPromoteAlgo(survivors, mbox, "bleh ");
Future<RepairResult> result = dut.start();
for (int i = 0; i < 3; i++) {
List<Iv2RepairLogResponseMessage> stuff = logs[i].contents(dut.getRequestId(), true);
System.out.println("Repair log size from: " + i + ": " + stuff.size());
for (Iv2RepairLogResponseMessage msg : stuff) {
msg.m_sourceHSId = (long) i;
dut.deliver(msg);
}
}
result.get();
assertFalse(result.isCancelled());
assertTrue(result.isDone());
// Unfortunately, it's painful to try to stub things to make repairSurvivors() work, so we'll
// go and inspect the guts of SpPromoteAlgo instead. This iteration is largely a copy of the inner loop
// of repairSurvivors()
List<TransactionInfoBaseMessage> finalStream = new ArrayList<TransactionInfoBaseMessage>();
for (Iv2RepairLogResponseMessage li : dut.m_repairLogUnion) {
VoltMessage msg = dut.createRepairMessage(li);
finalStream.add((TransactionInfoBaseMessage) msg);
}
// Check the sanity of the repair stream generated by the MPI.
long lastTxnId = Long.MIN_VALUE;
boolean seenFrag = false;
for (TransactionInfoBaseMessage msg : finalStream) {
if (lastTxnId == Long.MIN_VALUE) {
lastTxnId = msg.getTxnId();
} else {
assertTrue(msg.getTxnId() > lastTxnId);
lastTxnId = msg.getTxnId();
}
if (msg instanceof FragmentTaskMessage) {
assertFalse(seenFrag);
seenFrag = true;
}
}
}
use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.
the class TestMpPromoteAlgo method testSlowDieOff.
@Test
public void testSlowDieOff() throws InterruptedException, ExecutionException {
System.out.println("Running testSlowDieOff");
InitiatorMailbox mailbox = mock(MpInitiatorMailbox.class);
doReturn(4L).when(mailbox).getHSId();
InOrder inOrder = inOrder(mailbox);
ArrayList<Long> masters = new ArrayList<Long>();
masters.add(1L);
masters.add(2L);
masters.add(3L);
MpPromoteAlgo algo = new MpPromoteAlgo(masters, mailbox, "Test");
long requestId = algo.getRequestId();
Future<RepairResult> result = algo.start();
// Master 1
// First, everyone completed
// has a frag for txn 1000. MP handle is 1000L
algo.deliver(makeRealAckResponse(requestId, 1L, 0, 8, txnEgo(1000L), m_hashinatorConfig));
algo.deliver(makeRealFragResponse(requestId, 1L, 1, 8, txnEgo(1000L)));
algo.deliver(makeRealCompleteResponse(requestId, 1L, 2, 8, txnEgo(1000L)));
// Second, 3 will lose complete
algo.deliver(makeRealFragResponse(requestId, 1L, 3, 8, txnEgo(1001L)));
algo.deliver(makeRealCompleteResponse(requestId, 1L, 4, 8, txnEgo(1001L)));
// Third, 2 will lose complete and 3 has nothing
algo.deliver(makeRealFragResponse(requestId, 1L, 5, 8, txnEgo(1002L)));
algo.deliver(makeRealCompleteResponse(requestId, 1L, 6, 8, txnEgo(1002L)));
// Fourth, 1 just has a fragment, the other two are gone.
algo.deliver(makeRealFragResponse(requestId, 1L, 7, 8, txnEgo(1003L)));
// Master 2
// has only the normal ack. Never saw an MP transaction.
algo.deliver(makeRealAckResponse(requestId, 2L, 0, 6, txnEgo(1000L), m_hashinatorConfig));
algo.deliver(makeRealFragResponse(requestId, 2L, 1, 6, txnEgo(1000L)));
algo.deliver(makeRealCompleteResponse(requestId, 2L, 2, 6, txnEgo(1000L)));
// second, 3 loses complete
algo.deliver(makeRealFragResponse(requestId, 2L, 3, 6, txnEgo(1001L)));
algo.deliver(makeRealCompleteResponse(requestId, 2L, 4, 6, txnEgo(1001L)));
// third, 2 (us) loses complete
algo.deliver(makeRealFragResponse(requestId, 2L, 5, 6, txnEgo(1002L)));
// Master 3
// also has a complete. MP handle is 1000L
algo.deliver(makeRealAckResponse(requestId, 3L, 0, 4, txnEgo(1000L), m_hashinatorConfig));
algo.deliver(makeRealFragResponse(requestId, 3L, 1, 4, txnEgo(1000L)));
algo.deliver(makeRealCompleteResponse(requestId, 3L, 2, 4, txnEgo(1000L)));
// 3 loses complete
algo.deliver(makeRealFragResponse(requestId, 3L, 3, 4, txnEgo(1001L)));
// MPI
// Deliver the last complete
algo.deliver(makeRealAckResponse(requestId, 4L, 0, 2, txnEgo(1002L), m_hashinatorConfig));
algo.deliver(makeRealCompleteResponse(requestId, 4L, 1, 2, txnEgo(1002L)));
// We should send to all hosts in all cases for all non-truncated MP txns now
List<Long> needsRepair = new ArrayList<Long>();
needsRepair.add(1L);
needsRepair.add(2L);
needsRepair.add(3L);
inOrder.verify(mailbox, times(4)).repairReplicasWith(eq(needsRepair), any(Iv2RepairLogResponseMessage.class));
assertEquals(txnEgo(1003L), result.get().m_txnId);
}
use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.
the class MpInitiator method acceptPromotion.
@Override
public void acceptPromotion() {
try {
long startTime = System.currentTimeMillis();
Boolean success = false;
m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
m_term.start();
while (!success) {
final RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
// term syslogs the start of leader promotion.
long txnid = Long.MIN_VALUE;
try {
RepairResult res = repair.start().get();
txnid = res.m_txnId;
success = true;
} catch (CancellationException e) {
success = false;
}
if (success) {
m_initiatorMailbox.setLeaderState(txnid);
List<Iv2InitiateTaskMessage> restartTxns = ((MpPromoteAlgo) repair).getInterruptedTxns();
if (!restartTxns.isEmpty()) {
// Should only be one restarting MP txn
if (restartTxns.size() > 1) {
tmLog.fatal("Detected a fatal condition while repairing multipartition transactions " + "following a cluster topology change.");
tmLog.fatal("The MPI found multiple transactions requiring restart: ");
for (Iv2InitiateTaskMessage txn : restartTxns) {
tmLog.fatal("Restart candidate: " + txn);
}
tmLog.fatal("This node will fail. Please contact VoltDB support with your cluster's " + "log files.");
m_initiatorMailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_term.getInterestingHSIds().get()), new DumpMessage());
throw new RuntimeException("Failing promoted MPI node with unresolvable repair condition.");
}
tmLog.debug(m_whoami + " restarting MP transaction: " + restartTxns.get(0));
m_initiatorMailbox.repairReplicasWith(null, restartTxns.get(0));
}
tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
// THIS IS where map cache should be updated, not
// in the promotion algorithm.
LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
} else {
// The only known reason to fail is a failed replica during
// recovery; that's a bounded event (by k-safety).
// CrashVoltDB here means one node failure causing another.
// Don't create a cascading failure - just try again.
tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
}
}
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
}
}
use of org.voltdb.iv2.RepairAlgo.RepairResult in project voltdb by VoltDB.
the class TestSpPromoteAlgo method testFuzz.
@Test
public void testFuzz() throws Exception {
InitiatorMailbox mbox = mock(InitiatorMailbox.class);
Map<Long, List<TransactionInfoBaseMessage>> finalStreams = new HashMap<Long, List<TransactionInfoBaseMessage>>();
Random rand = new Random(System.currentTimeMillis());
// Generate a random message stream to several "replicas", interrupted
// at random points to all but one. Validate that promotion repair
// results in identical, correct, repair streams to all replicas.
TxnEgo sphandle = TxnEgo.makeZero(0);
UniqueIdGenerator spbuig = new UniqueIdGenerator(0, 0);
UniqueIdGenerator mpbuig = new UniqueIdGenerator(0, 0);
sphandle = sphandle.makeNext();
RandomMsgGenerator msgGen = new RandomMsgGenerator();
boolean[] stops = new boolean[3];
RepairLog[] logs = new RepairLog[3];
for (int i = 0; i < 3; i++) {
logs[i] = new RepairLog();
stops[i] = false;
finalStreams.put((long) i, new ArrayList<TransactionInfoBaseMessage>());
}
long maxBinaryLogSpUniqueId = Long.MIN_VALUE;
for (int i = 0; i < 4000; i++) {
// get next message, update the sphandle according to SpScheduler rules,
// but only submit messages that would have been forwarded by the master
// to the repair log.
TransactionInfoBaseMessage msg = msgGen.generateRandomMessageInStream();
msg.setSpHandle(sphandle.getTxnId());
if (msg instanceof Iv2InitiateTaskMessage) {
Pair<Long, Long> uids = TestRepairLog.setBinaryLogUniqueId(msg, spbuig, mpbuig);
maxBinaryLogSpUniqueId = Math.max(maxBinaryLogSpUniqueId, uids.getFirst());
}
sphandle = sphandle.makeNext();
if (!msg.isReadOnly() || msg instanceof CompleteTransactionMessage) {
if (!stops[0]) {
logs[0].deliver(msg);
}
if (!stops[1]) {
logs[1].deliver(msg);
}
logs[2].deliver(msg);
// be fed any transactions
for (int j = 0; j < 2; j++) {
// Hacky way to get spaced failures
if (rand.nextDouble() < (.01 / ((j + 1) * 5))) {
stops[j] = true;
}
}
}
}
List<Long> survivors = new ArrayList<Long>();
survivors.add(0l);
survivors.add(1l);
survivors.add(2l);
SpPromoteAlgo dut = new SpPromoteAlgo(survivors, mbox, "bleh ", 0);
Future<RepairResult> result = dut.start();
for (int i = 0; i < 3; i++) {
List<Iv2RepairLogResponseMessage> stuff = logs[i].contents(dut.getRequestId(), false);
System.out.println("Repair log size from: " + i + ": " + stuff.size());
for (Iv2RepairLogResponseMessage msg : stuff) {
msg.m_sourceHSId = i;
dut.deliver(msg);
// First message is metadata only, skip it in validation stream
if (msg.getSequence() > 0) {
//System.out.println("finalstreams: " + finalStreams);
//System.out.println("get(i): " + i + ": " + finalStreams.get((long)i));
//System.out.println("msg: " + msg);
finalStreams.get((long) i).add((TransactionInfoBaseMessage) msg.getPayload());
}
}
}
assertFalse(result.isCancelled());
assertTrue(result.isDone());
// of repairSurvivors()
for (Iv2RepairLogResponseMessage li : dut.m_repairLogUnion) {
for (Entry<Long, SpPromoteAlgo.ReplicaRepairStruct> entry : dut.m_replicaRepairStructs.entrySet()) {
if (entry.getValue().needs(li.getHandle())) {
// append the missing message for this 'node' to the list of messages that node has seen
finalStreams.get(entry.getKey()).add((TransactionInfoBaseMessage) li.getPayload());
}
}
}
// check that all the lists for all the nodes are identical after repair
int longest = Integer.MIN_VALUE;
for (Entry<Long, List<TransactionInfoBaseMessage>> entry : finalStreams.entrySet()) {
System.out.println("SIZE: " + entry.getValue().size());
if (entry.getValue().size() > longest) {
if (longest == Integer.MIN_VALUE) {
longest = entry.getValue().size();
} else {
fail("Mismatch in repair stream size!");
}
}
}
for (int i = 0; i < longest; i++) {
TransactionInfoBaseMessage current = null;
for (Entry<Long, List<TransactionInfoBaseMessage>> entry : finalStreams.entrySet()) {
TransactionInfoBaseMessage msg = entry.getValue().get(i);
if (current == null) {
current = msg;
} else {
assertEquals(current.getSpHandle(), msg.getSpHandle());
assertEquals(current.getClass(), msg.getClass());
}
}
}
}
Aggregations