use of org.voltdb.messaging.Iv2InitiateTaskMessage in project voltdb by VoltDB.
the class SpScheduler method handleIv2InitiateTaskMessageRepair.
private void handleIv2InitiateTaskMessageRepair(List<Long> needsRepair, Iv2InitiateTaskMessage message) {
if (!message.isSinglePartition()) {
throw new RuntimeException("SpScheduler.handleIv2InitiateTaskMessageRepair " + "should never receive multi-partition initiations.");
}
// set up duplicate counter. expect exactly the responses corresponding
// to needsRepair. These may, or may not, include the local site.
// We currently send the final response into the ether, since we don't
// have the original ClientInterface HSID stored. It would be more
// useful to have the original ClienInterface HSId somewhere handy.
List<Long> expectedHSIds = new ArrayList<Long>(needsRepair);
DuplicateCounter counter = new DuplicateCounter(HostMessenger.VALHALLA, message.getTxnId(), expectedHSIds, message);
safeAddToDuplicateCounterMap(new DuplicateCounterKey(message.getTxnId(), message.getSpHandle()), counter);
m_uniqueIdGenerator.updateMostRecentlyGeneratedUniqueId(message.getUniqueId());
// is local repair necessary?
if (needsRepair.contains(m_mailbox.getHSId())) {
needsRepair.remove(m_mailbox.getHSId());
// make a copy because handleIv2 non-repair case does?
Iv2InitiateTaskMessage localWork = new Iv2InitiateTaskMessage(message.getInitiatorHSId(), message.getCoordinatorHSId(), message);
doLocalInitiateOffer(localWork);
}
// is remote repair necessary?
if (!needsRepair.isEmpty()) {
Iv2InitiateTaskMessage replmsg = new Iv2InitiateTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), message);
m_mailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(needsRepair), replmsg);
}
}
use of org.voltdb.messaging.Iv2InitiateTaskMessage in project voltdb by VoltDB.
the class SpScheduler method deliverReadyTxns.
/**
* Poll the replay sequencer and process the messages until it returns null
*/
private void deliverReadyTxns() {
// First, pull all the sequenced messages, if any.
VoltMessage m = m_replaySequencer.poll();
while (m != null) {
deliver(m);
m = m_replaySequencer.poll();
}
// Then, try to pull all the drainable messages, if any.
m = m_replaySequencer.drain();
while (m != null) {
if (m instanceof Iv2InitiateTaskMessage) {
// Send IGNORED response for all SPs
Iv2InitiateTaskMessage task = (Iv2InitiateTaskMessage) m;
final InitiateResponseMessage response = new InitiateResponseMessage(task);
response.setResults(new ClientResponseImpl(ClientResponse.UNEXPECTED_FAILURE, new VoltTable[0], ClientResponseImpl.IGNORED_TRANSACTION));
m_mailbox.send(response.getInitiatorHSId(), response);
}
m = m_replaySequencer.drain();
}
}
use of org.voltdb.messaging.Iv2InitiateTaskMessage in project voltdb by VoltDB.
the class MpScheduler method handleIv2InitiateTaskMessageRepair.
private void handleIv2InitiateTaskMessageRepair(List<Long> needsRepair, Iv2InitiateTaskMessage message) {
// just reforward the Iv2InitiateTaskMessage for the txn being restarted
// this copy may be unnecessary
final String procedureName = message.getStoredProcedureName();
Iv2InitiateTaskMessage mp = new Iv2InitiateTaskMessage(message.getInitiatorHSId(), message.getCoordinatorHSId(), message.getTruncationHandle(), message.getTxnId(), message.getUniqueId(), message.isReadOnly(), message.isSinglePartition(), message.getStoredProcedureInvocation(), message.getClientInterfaceHandle(), message.getConnectionId(), message.isForReplay());
m_uniqueIdGenerator.updateMostRecentlyGeneratedUniqueId(message.getUniqueId());
// Multi-partition initiation (at the MPI)
MpProcedureTask task = null;
if (isNpTxn(message) && NpProcedureTaskConstructor != null) {
Set<Integer> involvedPartitions = getBalancePartitions(message);
if (involvedPartitions != null) {
HashMap<Integer, Long> involvedPartitionMasters = Maps.newHashMap(m_partitionMasters);
involvedPartitionMasters.keySet().retainAll(involvedPartitions);
task = instantiateNpProcedureTask(m_mailbox, procedureName, m_pendingTasks, mp, involvedPartitionMasters, m_buddyHSIds.get(m_nextBuddy), true);
}
// if cannot figure out the involved partitions, run it as an MP txn
}
if (task == null) {
task = new MpProcedureTask(m_mailbox, procedureName, m_pendingTasks, mp, m_iv2Masters, m_partitionMasters, m_buddyHSIds.get(m_nextBuddy), true);
}
m_nextBuddy = (m_nextBuddy++) % m_buddyHSIds.size();
m_outstandingTxns.put(task.m_txnState.txnId, task.m_txnState);
m_pendingTasks.offer(task);
}
use of org.voltdb.messaging.Iv2InitiateTaskMessage in project voltdb by VoltDB.
the class MpInitiator method acceptPromotion.
@Override
public void acceptPromotion() {
try {
long startTime = System.currentTimeMillis();
Boolean success = false;
m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
m_term.start();
while (!success) {
final RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
// term syslogs the start of leader promotion.
long txnid = Long.MIN_VALUE;
try {
RepairResult res = repair.start().get();
txnid = res.m_txnId;
success = true;
} catch (CancellationException e) {
success = false;
}
if (success) {
m_initiatorMailbox.setLeaderState(txnid);
List<Iv2InitiateTaskMessage> restartTxns = ((MpPromoteAlgo) repair).getInterruptedTxns();
if (!restartTxns.isEmpty()) {
// Should only be one restarting MP txn
if (restartTxns.size() > 1) {
tmLog.fatal("Detected a fatal condition while repairing multipartition transactions " + "following a cluster topology change.");
tmLog.fatal("The MPI found multiple transactions requiring restart: ");
for (Iv2InitiateTaskMessage txn : restartTxns) {
tmLog.fatal("Restart candidate: " + txn);
}
tmLog.fatal("This node will fail. Please contact VoltDB support with your cluster's " + "log files.");
m_initiatorMailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_term.getInterestingHSIds().get()), new DumpMessage());
throw new RuntimeException("Failing promoted MPI node with unresolvable repair condition.");
}
tmLog.debug(m_whoami + " restarting MP transaction: " + restartTxns.get(0));
m_initiatorMailbox.repairReplicasWith(null, restartTxns.get(0));
}
tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
// THIS IS where map cache should be updated, not
// in the promotion algorithm.
LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
} else {
// The only known reason to fail is a failed replica during
// recovery; that's a bounded event (by k-safety).
// CrashVoltDB here means one node failure causing another.
// Don't create a cascading failure - just try again.
tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
}
}
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
}
}
use of org.voltdb.messaging.Iv2InitiateTaskMessage in project voltdb by VoltDB.
the class TestSpPromoteAlgo method makeStaleResponse.
Iv2RepairLogResponseMessage makeStaleResponse(long spHandle, long requestId) {
Iv2RepairLogResponseMessage m = makeResponse(spHandle);
when(m.getRequestId()).thenReturn(requestId);
Iv2InitiateTaskMessage im = mock(Iv2InitiateTaskMessage.class);
when(m.getPayload()).thenReturn(im);
return m;
}
Aggregations