use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.
the class RepairLog method deliver.
// Offer a new message to the repair log. This will truncate
// the repairLog if the message includes a truncation hint.
public void deliver(VoltMessage msg) {
if (!m_isLeader && msg instanceof Iv2InitiateTaskMessage) {
final Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) msg;
// We can't repair read only SP transactions. Just don't log them to the repair log.
if (m.isReadOnly()) {
return;
}
m_lastSpHandle = m.getSpHandle();
truncate(m.getTruncationHandle(), IS_SP);
m_logSP.add(new Item(IS_SP, m, m.getSpHandle(), m.getTxnId()));
} else if (msg instanceof FragmentTaskMessage) {
final FragmentTaskMessage m = (FragmentTaskMessage) msg;
// We can't repair read only SP transactions. Just don't log them to the repair log.
if (m.isReadOnly()) {
return;
}
truncate(m.getTruncationHandle(), IS_MP);
// only log the first fragment of a procedure (and handle 1st case)
if (m.getTxnId() > m_lastMpHandle || m_lastMpHandle == Long.MAX_VALUE) {
m_logMP.add(new Item(IS_MP, m, m.getSpHandle(), m.getTxnId()));
m_lastMpHandle = m.getTxnId();
m_lastSpHandle = m.getSpHandle();
}
} else if (msg instanceof CompleteTransactionMessage) {
// a CompleteTransactionMessage which indicates restart is not the end of the
// transaction. We don't want to log it in the repair log.
CompleteTransactionMessage ctm = (CompleteTransactionMessage) msg;
// Restart transaction do not need to be repaired here, don't log them as well.
if (ctm.isReadOnly() || ctm.isRestart()) {
return;
}
truncate(ctm.getTruncationHandle(), IS_MP);
m_logMP.add(new Item(IS_MP, ctm, ctm.getSpHandle(), ctm.getTxnId()));
//Restore will send a complete transaction message with a lower mp transaction id because
//the restore transaction precedes the loading of the right mp transaction id from the snapshot
//Hence Math.max
m_lastMpHandle = Math.max(m_lastMpHandle, ctm.getTxnId());
m_lastSpHandle = ctm.getSpHandle();
} else if (msg instanceof DumpMessage) {
String who = CoreUtils.hsIdToString(m_HSId);
tmLog.warn("Repair log dump for site: " + who + ", isLeader: " + m_isLeader + ", " + who + ": lastSpHandle: " + m_lastSpHandle + ", lastMpHandle: " + m_lastMpHandle);
for (Iv2RepairLogResponseMessage il : contents(0l, false)) {
tmLog.warn("[Repair log contents]" + who + ": msg: " + il);
}
} else if (msg instanceof RepairLogTruncationMessage) {
final RepairLogTruncationMessage truncateMsg = (RepairLogTruncationMessage) msg;
truncate(truncateMsg.getHandle(), IS_SP);
}
}
use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.
the class SpScheduler method handleDumpMessage.
private void handleDumpMessage() {
String who = CoreUtils.hsIdToString(m_mailbox.getHSId());
hostLog.warn("State dump for site: " + who);
hostLog.warn(who + ": partition: " + m_partitionId + ", isLeader: " + m_isLeader);
if (m_isLeader) {
hostLog.warn(who + ": replicas: " + CoreUtils.hsIdCollectionToString(m_replicaHSIds));
if (m_sendToHSIds.length > 0) {
m_mailbox.send(m_sendToHSIds, new DumpMessage());
}
}
hostLog.warn(who + ": most recent SP handle: " + TxnEgo.txnIdToString(getCurrentTxnId()));
hostLog.warn(who + ": outstanding txns: " + m_outstandingTxns.keySet() + " " + TxnEgo.txnIdCollectionToString(m_outstandingTxns.keySet()));
hostLog.warn(who + ": TransactionTaskQueue: " + m_pendingTasks.toString());
if (m_duplicateCounters.size() > 0) {
hostLog.warn(who + ": duplicate counters: ");
for (Entry<DuplicateCounterKey, DuplicateCounter> e : m_duplicateCounters.entrySet()) {
hostLog.warn("\t" + who + ": " + e.getKey().toString() + ": " + e.getValue().toString());
}
}
}
use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.
the class MpTransactionState method pollForResponses.
private FragmentResponseMessage pollForResponses() {
FragmentResponseMessage msg = null;
try {
final String snapShotRestoreProcName = "@SnapshotRestore";
while (msg == null) {
msg = m_newDeps.poll(60L * 5, TimeUnit.SECONDS);
if (msg == null && !snapShotRestoreProcName.equals(m_initiationMsg.getStoredProcedureName())) {
tmLog.warn("Possible multipartition transaction deadlock detected for: " + m_initiationMsg);
if (m_remoteWork == null) {
tmLog.warn("Waiting on local BorrowTask response from site: " + CoreUtils.hsIdToString(m_buddyHSId));
} else {
tmLog.warn("Waiting on remote dependencies: ");
for (Entry<Integer, Set<Long>> e : m_remoteDeps.entrySet()) {
tmLog.warn("Dep ID: " + e.getKey() + " waiting on: " + CoreUtils.hsIdCollectionToString(e.getValue()));
}
}
m_mbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_useHSIds), new DumpMessage());
}
}
} catch (InterruptedException e) {
// could retry; but this is unexpected. Crash.
throw new RuntimeException(e);
}
SerializableException se = msg.getException();
if (se != null && se instanceof TransactionRestartException) {
// If this is a restart exception, we don't need to match up the DependencyId
setNeedsRollback(true);
throw se;
}
return msg;
}
use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.
the class MpInitiator method acceptPromotion.
@Override
public void acceptPromotion() {
try {
long startTime = System.currentTimeMillis();
Boolean success = false;
m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
m_term.start();
while (!success) {
final RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
// term syslogs the start of leader promotion.
long txnid = Long.MIN_VALUE;
try {
RepairResult res = repair.start().get();
txnid = res.m_txnId;
success = true;
} catch (CancellationException e) {
success = false;
}
if (success) {
m_initiatorMailbox.setLeaderState(txnid);
List<Iv2InitiateTaskMessage> restartTxns = ((MpPromoteAlgo) repair).getInterruptedTxns();
if (!restartTxns.isEmpty()) {
// Should only be one restarting MP txn
if (restartTxns.size() > 1) {
tmLog.fatal("Detected a fatal condition while repairing multipartition transactions " + "following a cluster topology change.");
tmLog.fatal("The MPI found multiple transactions requiring restart: ");
for (Iv2InitiateTaskMessage txn : restartTxns) {
tmLog.fatal("Restart candidate: " + txn);
}
tmLog.fatal("This node will fail. Please contact VoltDB support with your cluster's " + "log files.");
m_initiatorMailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_term.getInterestingHSIds().get()), new DumpMessage());
throw new RuntimeException("Failing promoted MPI node with unresolvable repair condition.");
}
tmLog.debug(m_whoami + " restarting MP transaction: " + restartTxns.get(0));
m_initiatorMailbox.repairReplicasWith(null, restartTxns.get(0));
}
tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
// THIS IS where map cache should be updated, not
// in the promotion algorithm.
LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
} else {
// The only known reason to fail is a failed replica during
// recovery; that's a bounded event (by k-safety).
// CrashVoltDB here means one node failure causing another.
// Don't create a cascading failure - just try again.
tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
}
}
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
}
}
Aggregations