Search in sources :

Example 1 with DumpMessage

use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.

the class RepairLog method deliver.

// Offer a new message to the repair log. This will truncate
// the repairLog if the message includes a truncation hint.
public void deliver(VoltMessage msg) {
    if (!m_isLeader && msg instanceof Iv2InitiateTaskMessage) {
        final Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) msg;
        // We can't repair read only SP transactions. Just don't log them to the repair log.
        if (m.isReadOnly()) {
            return;
        }
        m_lastSpHandle = m.getSpHandle();
        truncate(m.getTruncationHandle(), IS_SP);
        m_logSP.add(new Item(IS_SP, m, m.getSpHandle(), m.getTxnId()));
    } else if (msg instanceof FragmentTaskMessage) {
        final FragmentTaskMessage m = (FragmentTaskMessage) msg;
        // We can't repair read only SP transactions. Just don't log them to the repair log.
        if (m.isReadOnly()) {
            return;
        }
        truncate(m.getTruncationHandle(), IS_MP);
        // only log the first fragment of a procedure (and handle 1st case)
        if (m.getTxnId() > m_lastMpHandle || m_lastMpHandle == Long.MAX_VALUE) {
            m_logMP.add(new Item(IS_MP, m, m.getSpHandle(), m.getTxnId()));
            m_lastMpHandle = m.getTxnId();
            m_lastSpHandle = m.getSpHandle();
        }
    } else if (msg instanceof CompleteTransactionMessage) {
        // a CompleteTransactionMessage which indicates restart is not the end of the
        // transaction.  We don't want to log it in the repair log.
        CompleteTransactionMessage ctm = (CompleteTransactionMessage) msg;
        // Restart transaction do not need to be repaired here, don't log them as well.
        if (ctm.isReadOnly() || ctm.isRestart()) {
            return;
        }
        truncate(ctm.getTruncationHandle(), IS_MP);
        m_logMP.add(new Item(IS_MP, ctm, ctm.getSpHandle(), ctm.getTxnId()));
        //Restore will send a complete transaction message with a lower mp transaction id because
        //the restore transaction precedes the loading of the right mp transaction id from the snapshot
        //Hence Math.max
        m_lastMpHandle = Math.max(m_lastMpHandle, ctm.getTxnId());
        m_lastSpHandle = ctm.getSpHandle();
    } else if (msg instanceof DumpMessage) {
        String who = CoreUtils.hsIdToString(m_HSId);
        tmLog.warn("Repair log dump for site: " + who + ", isLeader: " + m_isLeader + ", " + who + ": lastSpHandle: " + m_lastSpHandle + ", lastMpHandle: " + m_lastMpHandle);
        for (Iv2RepairLogResponseMessage il : contents(0l, false)) {
            tmLog.warn("[Repair log contents]" + who + ": msg: " + il);
        }
    } else if (msg instanceof RepairLogTruncationMessage) {
        final RepairLogTruncationMessage truncateMsg = (RepairLogTruncationMessage) msg;
        truncate(truncateMsg.getHandle(), IS_SP);
    }
}
Also used : Iv2RepairLogResponseMessage(org.voltdb.messaging.Iv2RepairLogResponseMessage) FragmentTaskMessage(org.voltdb.messaging.FragmentTaskMessage) Iv2InitiateTaskMessage(org.voltdb.messaging.Iv2InitiateTaskMessage) CompleteTransactionMessage(org.voltdb.messaging.CompleteTransactionMessage) DumpMessage(org.voltdb.messaging.DumpMessage) RepairLogTruncationMessage(org.voltdb.messaging.RepairLogTruncationMessage)

Example 2 with DumpMessage

use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.

the class SpScheduler method handleDumpMessage.

private void handleDumpMessage() {
    String who = CoreUtils.hsIdToString(m_mailbox.getHSId());
    hostLog.warn("State dump for site: " + who);
    hostLog.warn(who + ": partition: " + m_partitionId + ", isLeader: " + m_isLeader);
    if (m_isLeader) {
        hostLog.warn(who + ": replicas: " + CoreUtils.hsIdCollectionToString(m_replicaHSIds));
        if (m_sendToHSIds.length > 0) {
            m_mailbox.send(m_sendToHSIds, new DumpMessage());
        }
    }
    hostLog.warn(who + ": most recent SP handle: " + TxnEgo.txnIdToString(getCurrentTxnId()));
    hostLog.warn(who + ": outstanding txns: " + m_outstandingTxns.keySet() + " " + TxnEgo.txnIdCollectionToString(m_outstandingTxns.keySet()));
    hostLog.warn(who + ": TransactionTaskQueue: " + m_pendingTasks.toString());
    if (m_duplicateCounters.size() > 0) {
        hostLog.warn(who + ": duplicate counters: ");
        for (Entry<DuplicateCounterKey, DuplicateCounter> e : m_duplicateCounters.entrySet()) {
            hostLog.warn("\t" + who + ": " + e.getKey().toString() + ": " + e.getValue().toString());
        }
    }
}
Also used : DumpMessage(org.voltdb.messaging.DumpMessage)

Example 3 with DumpMessage

use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.

the class MpTransactionState method pollForResponses.

private FragmentResponseMessage pollForResponses() {
    FragmentResponseMessage msg = null;
    try {
        final String snapShotRestoreProcName = "@SnapshotRestore";
        while (msg == null) {
            msg = m_newDeps.poll(60L * 5, TimeUnit.SECONDS);
            if (msg == null && !snapShotRestoreProcName.equals(m_initiationMsg.getStoredProcedureName())) {
                tmLog.warn("Possible multipartition transaction deadlock detected for: " + m_initiationMsg);
                if (m_remoteWork == null) {
                    tmLog.warn("Waiting on local BorrowTask response from site: " + CoreUtils.hsIdToString(m_buddyHSId));
                } else {
                    tmLog.warn("Waiting on remote dependencies: ");
                    for (Entry<Integer, Set<Long>> e : m_remoteDeps.entrySet()) {
                        tmLog.warn("Dep ID: " + e.getKey() + " waiting on: " + CoreUtils.hsIdCollectionToString(e.getValue()));
                    }
                }
                m_mbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_useHSIds), new DumpMessage());
            }
        }
    } catch (InterruptedException e) {
        // could retry; but this is unexpected. Crash.
        throw new RuntimeException(e);
    }
    SerializableException se = msg.getException();
    if (se != null && se instanceof TransactionRestartException) {
        // If this is a restart exception, we don't need to match up the DependencyId
        setNeedsRollback(true);
        throw se;
    }
    return msg;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) DumpMessage(org.voltdb.messaging.DumpMessage) FragmentResponseMessage(org.voltdb.messaging.FragmentResponseMessage) TransactionRestartException(org.voltdb.exceptions.TransactionRestartException) SerializableException(org.voltdb.exceptions.SerializableException)

Example 4 with DumpMessage

use of org.voltdb.messaging.DumpMessage in project voltdb by VoltDB.

the class MpInitiator method acceptPromotion.

@Override
public void acceptPromotion() {
    try {
        long startTime = System.currentTimeMillis();
        Boolean success = false;
        m_term = createTerm(m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
        m_term.start();
        while (!success) {
            final RepairAlgo repair = m_initiatorMailbox.constructRepairAlgo(m_term.getInterestingHSIds(), m_whoami);
            // term syslogs the start of leader promotion.
            long txnid = Long.MIN_VALUE;
            try {
                RepairResult res = repair.start().get();
                txnid = res.m_txnId;
                success = true;
            } catch (CancellationException e) {
                success = false;
            }
            if (success) {
                m_initiatorMailbox.setLeaderState(txnid);
                List<Iv2InitiateTaskMessage> restartTxns = ((MpPromoteAlgo) repair).getInterruptedTxns();
                if (!restartTxns.isEmpty()) {
                    // Should only be one restarting MP txn
                    if (restartTxns.size() > 1) {
                        tmLog.fatal("Detected a fatal condition while repairing multipartition transactions " + "following a cluster topology change.");
                        tmLog.fatal("The MPI found multiple transactions requiring restart: ");
                        for (Iv2InitiateTaskMessage txn : restartTxns) {
                            tmLog.fatal("Restart candidate: " + txn);
                        }
                        tmLog.fatal("This node will fail.  Please contact VoltDB support with your cluster's " + "log files.");
                        m_initiatorMailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_term.getInterestingHSIds().get()), new DumpMessage());
                        throw new RuntimeException("Failing promoted MPI node with unresolvable repair condition.");
                    }
                    tmLog.debug(m_whoami + " restarting MP transaction: " + restartTxns.get(0));
                    m_initiatorMailbox.repairReplicasWith(null, restartTxns.get(0));
                }
                tmLog.info(m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms.");
                // THIS IS where map cache should be updated, not
                // in the promotion algorithm.
                LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
                iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
            } else {
                // The only known reason to fail is a failed replica during
                // recovery; that's a bounded event (by k-safety).
                // CrashVoltDB here means one node failure causing another.
                // Don't create a cascading failure - just try again.
                tmLog.info(m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying.");
            }
        }
    } catch (Exception e) {
        VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
    }
}
Also used : Iv2InitiateTaskMessage(org.voltdb.messaging.Iv2InitiateTaskMessage) DumpMessage(org.voltdb.messaging.DumpMessage) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) KeeperException(org.apache.zookeeper_voltpatches.KeeperException) RepairResult(org.voltdb.iv2.RepairAlgo.RepairResult) CancellationException(java.util.concurrent.CancellationException)

Aggregations

DumpMessage (org.voltdb.messaging.DumpMessage)4 Iv2InitiateTaskMessage (org.voltdb.messaging.Iv2InitiateTaskMessage)2 HashSet (java.util.HashSet)1 Set (java.util.Set)1 CancellationException (java.util.concurrent.CancellationException)1 ExecutionException (java.util.concurrent.ExecutionException)1 KeeperException (org.apache.zookeeper_voltpatches.KeeperException)1 SerializableException (org.voltdb.exceptions.SerializableException)1 TransactionRestartException (org.voltdb.exceptions.TransactionRestartException)1 RepairResult (org.voltdb.iv2.RepairAlgo.RepairResult)1 CompleteTransactionMessage (org.voltdb.messaging.CompleteTransactionMessage)1 FragmentResponseMessage (org.voltdb.messaging.FragmentResponseMessage)1 FragmentTaskMessage (org.voltdb.messaging.FragmentTaskMessage)1 Iv2RepairLogResponseMessage (org.voltdb.messaging.Iv2RepairLogResponseMessage)1 RepairLogTruncationMessage (org.voltdb.messaging.RepairLogTruncationMessage)1