Search in sources :

Example 1 with RecoveryMessage

use of org.voltcore.messaging.RecoveryMessage in project voltdb by VoltDB.

the class AgreementSite method recoveryRunLoop.

public void recoveryRunLoop() throws Exception {
    long lastHeartbeatTime = System.currentTimeMillis();
    while (m_recovering && m_shouldContinue) {
        if (m_recoveryStage == RecoveryStage.WAITING_FOR_SAFETY) {
            Long safeTxnId = m_txnQueue.safeToRecover();
            if (safeTxnId != null) {
                m_recoveryStage = RecoveryStage.SENT_PROPOSAL;
                m_recoverBeforeTxn = safeTxnId;
                long sourceHSId = 0;
                for (Long hsId : m_hsIds) {
                    if (hsId != m_hsId) {
                        sourceHSId = hsId;
                        break;
                    }
                }
                RecoveryMessage recoveryMessage = new RecoveryMessage(m_hsId, safeTxnId, -1);
                m_mailbox.send(sourceHSId, recoveryMessage);
            }
        }
        VoltMessage message = m_mailbox.recvBlocking(5);
        if (message != null) {
            processMessage(message);
        }
        final long now = System.currentTimeMillis();
        if (now - lastHeartbeatTime > 5) {
            lastHeartbeatTime = now;
            sendHeartbeats();
        }
        if (m_recoverBeforeTxn == null) {
            continue;
        }
        if (m_txnQueue.peek() != null && m_txnQueue.peek().txnId < m_recoverBeforeTxn.longValue()) {
            m_transactionsById.remove(m_txnQueue.poll().txnId);
        } else if (m_recoveryStage == RecoveryStage.RECEIVED_SNAPSHOT) {
            processZKSnapshot();
            return;
        }
    }
}
Also used : VoltMessage(org.voltcore.messaging.VoltMessage) RecoveryMessage(org.voltcore.messaging.RecoveryMessage)

Example 2 with RecoveryMessage

use of org.voltcore.messaging.RecoveryMessage in project voltdb by VoltDB.

the class AgreementSite method processMessage.

private void processMessage(VoltMessage message) throws Exception {
    if (!m_hsIds.contains(message.m_sourceHSId)) {
        m_recoveryLog.info("Dropping message " + message + " because it is not from a known up site");
        return;
    }
    if (message instanceof TransactionInfoBaseMessage) {
        TransactionInfoBaseMessage info = (TransactionInfoBaseMessage) message;
        // Special case heartbeats which only update RPQ
        if (info instanceof HeartbeatMessage) {
            // use the heartbeat to unclog the priority queue if clogged
            long lastSeenTxnFromInitiator = m_txnQueue.noteTransactionRecievedAndReturnLastSeen(info.getInitiatorHSId(), info.getTxnId(), ((HeartbeatMessage) info).getLastSafeTxnId());
            // respond to the initiator with the last seen transaction
            HeartbeatResponseMessage response = new HeartbeatResponseMessage(m_hsId, lastSeenTxnFromInitiator, m_txnQueue.getQueueState() == RestrictedPriorityQueue.QueueState.BLOCKED_SAFETY);
            m_mailbox.send(info.getInitiatorHSId(), response);
            // we're done here (in the case of heartbeats)
            return;
        }
        assert (false);
    } else if (message instanceof HeartbeatResponseMessage) {
        HeartbeatResponseMessage hrm = (HeartbeatResponseMessage) message;
        m_safetyState.updateLastSeenTxnIdFromExecutorBySiteId(hrm.getExecHSId(), hrm.getLastReceivedTxnId());
    } else if (message instanceof LocalObjectMessage) {
        LocalObjectMessage lom = (LocalObjectMessage) message;
        if (lom.payload instanceof Runnable) {
            ((Runnable) lom.payload).run();
        } else if (lom.payload instanceof Request) {
            Request r = (Request) lom.payload;
            long txnId = 0;
            boolean isRead = false;
            switch(r.type) {
                case OpCode.createSession:
                    txnId = r.sessionId;
                    break;
                //For reads see if we can skip global agreement and just do the read
                case OpCode.exists:
                case OpCode.getChildren:
                case OpCode.getChildren2:
                case OpCode.getData:
                    //in this case because ordering of reads and writes matters
                    if (m_txnQueue.isEmpty()) {
                        r.setOwner(m_hsId);
                        m_server.prepRequest(new Request(r), m_lastUsedTxnId);
                        return;
                    }
                    isRead = true;
                //it in the global order
                default:
                    txnId = m_idManager.getNextUniqueTransactionId();
                    break;
            }
            /*
                 * Don't send the whole request if this is a read blocked on a write
                 * We may send a heartbeat instead of propagating a useless read transaction
                 * at the end of this block
                 */
            if (!isRead) {
                for (long initiatorHSId : m_hsIds) {
                    if (initiatorHSId == m_hsId)
                        continue;
                    AgreementTaskMessage atm = new AgreementTaskMessage(r, txnId, m_hsId, m_safetyState.getNewestGloballySafeTxnId());
                    m_mailbox.send(initiatorHSId, atm);
                }
            }
            //Process the ATM eagerly locally to aid
            //in having a complete set of stuff to ship
            //to a recovering agreement site
            AgreementTaskMessage atm = new AgreementTaskMessage(new Request(r), txnId, m_hsId, m_safetyState.getNewestGloballySafeTxnId());
            atm.m_sourceHSId = m_hsId;
            processMessage(atm);
            /*
                 * Don't send a heartbeat out for ever single blocked read that occurs
                 * Try and limit to 2000 a second which is a lot and should be pretty
                 * close to the previous behavior of propagating all reads. My measurements
                 * don't show the old behavior is better than none at all, but I fear
                 * change.
                 */
            if (isRead) {
                final long now = System.nanoTime();
                if (TimeUnit.NANOSECONDS.toMicros(now - m_lastHeartbeatTime) > 500) {
                    m_lastHeartbeatTime = now;
                    sendHeartbeats();
                }
            }
        }
    } else if (message instanceof AgreementTaskMessage) {
        AgreementTaskMessage atm = (AgreementTaskMessage) message;
        if (!m_transactionsById.containsKey(atm.m_txnId) && atm.m_txnId >= m_minTxnIdAfterRecovery) {
            m_txnQueue.noteTransactionRecievedAndReturnLastSeen(atm.m_initiatorHSId, atm.m_txnId, atm.m_lastSafeTxnId);
            AgreementTransactionState transactionState = new AgreementTransactionState(atm.m_txnId, atm.m_initiatorHSId, atm.m_request);
            if (m_txnQueue.add(transactionState)) {
                m_transactionsById.put(transactionState.txnId, transactionState);
            } else {
                m_agreementLog.info("Dropping txn " + transactionState.txnId + " data from failed initiatorSiteId: " + transactionState.initiatorHSId);
            }
        } else {
            m_recoveryLog.info("Agreement, discarding duplicate txn during recovery, txnid is " + atm.m_txnId + " this should only occur during recovery. minTxnIdAfterRecovery " + m_minTxnIdAfterRecovery + " and  dup is " + m_transactionsById.containsKey(atm.m_txnId));
        }
    } else if (message instanceof BinaryPayloadMessage) {
        BinaryPayloadMessage bpm = (BinaryPayloadMessage) message;
        ByteBuffer metadata = ByteBuffer.wrap(bpm.m_metadata);
        final byte type = metadata.get();
        if (type == BINARY_PAYLOAD_SNAPSHOT) {
            assert (m_recovering);
            assert (m_recoveryStage == RecoveryStage.SENT_PROPOSAL);
            if (m_recoveryStage != RecoveryStage.SENT_PROPOSAL) {
                org.voltdb.VoltDB.crashLocalVoltDB("Received a recovery snapshot in stage " + m_recoveryStage.toString(), true, null);
            }
            long selectedRecoverBeforeTxn = metadata.getLong();
            if (selectedRecoverBeforeTxn < m_recoverBeforeTxn) {
                org.voltdb.VoltDB.crashLocalVoltDB("Selected recover before txn was earlier than the  proposed recover before txn", true, null);
            }
            m_recoverBeforeTxn = selectedRecoverBeforeTxn;
            //anything before this precedes the snapshot
            m_minTxnIdAfterRecovery = m_recoverBeforeTxn;
            try {
                m_recoverySnapshot = org.xerial.snappy.Snappy.uncompress(bpm.m_payload);
            } catch (IOException e) {
                org.voltdb.VoltDB.crashLocalVoltDB("Unable to decompress ZK snapshot", true, e);
            }
            m_recoveryStage = RecoveryStage.RECEIVED_SNAPSHOT;
            /*
                 * Clean out all txns from before the snapshot
                 */
            Iterator<Map.Entry<Long, OrderableTransaction>> iter = m_transactionsById.entrySet().iterator();
            while (iter.hasNext()) {
                final Map.Entry<Long, OrderableTransaction> entry = iter.next();
                if (entry.getKey() < m_minTxnIdAfterRecovery) {
                    m_txnQueue.faultTransaction(entry.getValue());
                    iter.remove();
                }
            }
        } else if (type == BINARY_PAYLOAD_JOIN_REQUEST) {
            JSONObject jsObj = new JSONObject(new String(bpm.m_payload, "UTF-8"));
            final long initiatorHSId = jsObj.getLong("initiatorHSId");
            final long txnId = jsObj.getLong("txnId");
            final long lastSafeTxnId = jsObj.getLong("lastSafeTxnId");
            final long joiningHSId = jsObj.getLong("joiningHSId");
            if (m_recovering) {
                org.voltdb.VoltDB.crashLocalVoltDB("Received a join request during recovery for " + CoreUtils.hsIdToString(joiningHSId) + " from " + CoreUtils.hsIdToString(initiatorHSId), true, null);
            }
            m_txnQueue.noteTransactionRecievedAndReturnLastSeen(initiatorHSId, txnId, lastSafeTxnId);
            AgreementRejoinTransactionState transactionState = new AgreementRejoinTransactionState(txnId, initiatorHSId, joiningHSId, null);
            if (m_txnQueue.add(transactionState)) {
                m_transactionsById.put(transactionState.txnId, transactionState);
            } else {
                m_agreementLog.info("Dropping txn " + transactionState.txnId + " data from failed initiatorSiteId: " + transactionState.initiatorHSId);
            }
        }
    } else if (message instanceof FaultMessage) {
        FaultMessage fm = (FaultMessage) message;
        discoverGlobalFaultData(fm);
    } else if (message instanceof RecoveryMessage) {
        RecoveryMessage rm = (RecoveryMessage) message;
        assert (m_recoverBeforeTxn == null);
        assert (m_siteRequestingRecovery == null);
        assert (m_recovering == false);
        assert (m_recoveryStage == RecoveryStage.RECOVERED);
        m_recoverBeforeTxn = rm.txnId();
        m_siteRequestingRecovery = rm.sourceSite();
    }
}
Also used : HeartbeatResponseMessage(org.voltcore.messaging.HeartbeatResponseMessage) Request(org.apache.zookeeper_voltpatches.server.Request) IOException(java.io.IOException) BinaryPayloadMessage(org.voltcore.messaging.BinaryPayloadMessage) ByteBuffer(java.nio.ByteBuffer) LocalObjectMessage(org.voltcore.messaging.LocalObjectMessage) HeartbeatMessage(org.voltcore.messaging.HeartbeatMessage) FaultMessage(org.voltcore.messaging.FaultMessage) JSONObject(org.json_voltpatches.JSONObject) RecoveryMessage(org.voltcore.messaging.RecoveryMessage) TransactionInfoBaseMessage(org.voltcore.messaging.TransactionInfoBaseMessage) AgreementTaskMessage(org.voltcore.messaging.AgreementTaskMessage) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

RecoveryMessage (org.voltcore.messaging.RecoveryMessage)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Request (org.apache.zookeeper_voltpatches.server.Request)1 JSONObject (org.json_voltpatches.JSONObject)1 AgreementTaskMessage (org.voltcore.messaging.AgreementTaskMessage)1 BinaryPayloadMessage (org.voltcore.messaging.BinaryPayloadMessage)1 FaultMessage (org.voltcore.messaging.FaultMessage)1 HeartbeatMessage (org.voltcore.messaging.HeartbeatMessage)1 HeartbeatResponseMessage (org.voltcore.messaging.HeartbeatResponseMessage)1 LocalObjectMessage (org.voltcore.messaging.LocalObjectMessage)1 TransactionInfoBaseMessage (org.voltcore.messaging.TransactionInfoBaseMessage)1 VoltMessage (org.voltcore.messaging.VoltMessage)1