use of org.voltcore.messaging.RecoveryMessage in project voltdb by VoltDB.
the class AgreementSite method recoveryRunLoop.
public void recoveryRunLoop() throws Exception {
long lastHeartbeatTime = System.currentTimeMillis();
while (m_recovering && m_shouldContinue) {
if (m_recoveryStage == RecoveryStage.WAITING_FOR_SAFETY) {
Long safeTxnId = m_txnQueue.safeToRecover();
if (safeTxnId != null) {
m_recoveryStage = RecoveryStage.SENT_PROPOSAL;
m_recoverBeforeTxn = safeTxnId;
long sourceHSId = 0;
for (Long hsId : m_hsIds) {
if (hsId != m_hsId) {
sourceHSId = hsId;
break;
}
}
RecoveryMessage recoveryMessage = new RecoveryMessage(m_hsId, safeTxnId, -1);
m_mailbox.send(sourceHSId, recoveryMessage);
}
}
VoltMessage message = m_mailbox.recvBlocking(5);
if (message != null) {
processMessage(message);
}
final long now = System.currentTimeMillis();
if (now - lastHeartbeatTime > 5) {
lastHeartbeatTime = now;
sendHeartbeats();
}
if (m_recoverBeforeTxn == null) {
continue;
}
if (m_txnQueue.peek() != null && m_txnQueue.peek().txnId < m_recoverBeforeTxn.longValue()) {
m_transactionsById.remove(m_txnQueue.poll().txnId);
} else if (m_recoveryStage == RecoveryStage.RECEIVED_SNAPSHOT) {
processZKSnapshot();
return;
}
}
}
use of org.voltcore.messaging.RecoveryMessage in project voltdb by VoltDB.
the class AgreementSite method processMessage.
private void processMessage(VoltMessage message) throws Exception {
if (!m_hsIds.contains(message.m_sourceHSId)) {
m_recoveryLog.info("Dropping message " + message + " because it is not from a known up site");
return;
}
if (message instanceof TransactionInfoBaseMessage) {
TransactionInfoBaseMessage info = (TransactionInfoBaseMessage) message;
// Special case heartbeats which only update RPQ
if (info instanceof HeartbeatMessage) {
// use the heartbeat to unclog the priority queue if clogged
long lastSeenTxnFromInitiator = m_txnQueue.noteTransactionRecievedAndReturnLastSeen(info.getInitiatorHSId(), info.getTxnId(), ((HeartbeatMessage) info).getLastSafeTxnId());
// respond to the initiator with the last seen transaction
HeartbeatResponseMessage response = new HeartbeatResponseMessage(m_hsId, lastSeenTxnFromInitiator, m_txnQueue.getQueueState() == RestrictedPriorityQueue.QueueState.BLOCKED_SAFETY);
m_mailbox.send(info.getInitiatorHSId(), response);
// we're done here (in the case of heartbeats)
return;
}
assert (false);
} else if (message instanceof HeartbeatResponseMessage) {
HeartbeatResponseMessage hrm = (HeartbeatResponseMessage) message;
m_safetyState.updateLastSeenTxnIdFromExecutorBySiteId(hrm.getExecHSId(), hrm.getLastReceivedTxnId());
} else if (message instanceof LocalObjectMessage) {
LocalObjectMessage lom = (LocalObjectMessage) message;
if (lom.payload instanceof Runnable) {
((Runnable) lom.payload).run();
} else if (lom.payload instanceof Request) {
Request r = (Request) lom.payload;
long txnId = 0;
boolean isRead = false;
switch(r.type) {
case OpCode.createSession:
txnId = r.sessionId;
break;
//For reads see if we can skip global agreement and just do the read
case OpCode.exists:
case OpCode.getChildren:
case OpCode.getChildren2:
case OpCode.getData:
//in this case because ordering of reads and writes matters
if (m_txnQueue.isEmpty()) {
r.setOwner(m_hsId);
m_server.prepRequest(new Request(r), m_lastUsedTxnId);
return;
}
isRead = true;
//it in the global order
default:
txnId = m_idManager.getNextUniqueTransactionId();
break;
}
/*
* Don't send the whole request if this is a read blocked on a write
* We may send a heartbeat instead of propagating a useless read transaction
* at the end of this block
*/
if (!isRead) {
for (long initiatorHSId : m_hsIds) {
if (initiatorHSId == m_hsId)
continue;
AgreementTaskMessage atm = new AgreementTaskMessage(r, txnId, m_hsId, m_safetyState.getNewestGloballySafeTxnId());
m_mailbox.send(initiatorHSId, atm);
}
}
//Process the ATM eagerly locally to aid
//in having a complete set of stuff to ship
//to a recovering agreement site
AgreementTaskMessage atm = new AgreementTaskMessage(new Request(r), txnId, m_hsId, m_safetyState.getNewestGloballySafeTxnId());
atm.m_sourceHSId = m_hsId;
processMessage(atm);
/*
* Don't send a heartbeat out for ever single blocked read that occurs
* Try and limit to 2000 a second which is a lot and should be pretty
* close to the previous behavior of propagating all reads. My measurements
* don't show the old behavior is better than none at all, but I fear
* change.
*/
if (isRead) {
final long now = System.nanoTime();
if (TimeUnit.NANOSECONDS.toMicros(now - m_lastHeartbeatTime) > 500) {
m_lastHeartbeatTime = now;
sendHeartbeats();
}
}
}
} else if (message instanceof AgreementTaskMessage) {
AgreementTaskMessage atm = (AgreementTaskMessage) message;
if (!m_transactionsById.containsKey(atm.m_txnId) && atm.m_txnId >= m_minTxnIdAfterRecovery) {
m_txnQueue.noteTransactionRecievedAndReturnLastSeen(atm.m_initiatorHSId, atm.m_txnId, atm.m_lastSafeTxnId);
AgreementTransactionState transactionState = new AgreementTransactionState(atm.m_txnId, atm.m_initiatorHSId, atm.m_request);
if (m_txnQueue.add(transactionState)) {
m_transactionsById.put(transactionState.txnId, transactionState);
} else {
m_agreementLog.info("Dropping txn " + transactionState.txnId + " data from failed initiatorSiteId: " + transactionState.initiatorHSId);
}
} else {
m_recoveryLog.info("Agreement, discarding duplicate txn during recovery, txnid is " + atm.m_txnId + " this should only occur during recovery. minTxnIdAfterRecovery " + m_minTxnIdAfterRecovery + " and dup is " + m_transactionsById.containsKey(atm.m_txnId));
}
} else if (message instanceof BinaryPayloadMessage) {
BinaryPayloadMessage bpm = (BinaryPayloadMessage) message;
ByteBuffer metadata = ByteBuffer.wrap(bpm.m_metadata);
final byte type = metadata.get();
if (type == BINARY_PAYLOAD_SNAPSHOT) {
assert (m_recovering);
assert (m_recoveryStage == RecoveryStage.SENT_PROPOSAL);
if (m_recoveryStage != RecoveryStage.SENT_PROPOSAL) {
org.voltdb.VoltDB.crashLocalVoltDB("Received a recovery snapshot in stage " + m_recoveryStage.toString(), true, null);
}
long selectedRecoverBeforeTxn = metadata.getLong();
if (selectedRecoverBeforeTxn < m_recoverBeforeTxn) {
org.voltdb.VoltDB.crashLocalVoltDB("Selected recover before txn was earlier than the proposed recover before txn", true, null);
}
m_recoverBeforeTxn = selectedRecoverBeforeTxn;
//anything before this precedes the snapshot
m_minTxnIdAfterRecovery = m_recoverBeforeTxn;
try {
m_recoverySnapshot = org.xerial.snappy.Snappy.uncompress(bpm.m_payload);
} catch (IOException e) {
org.voltdb.VoltDB.crashLocalVoltDB("Unable to decompress ZK snapshot", true, e);
}
m_recoveryStage = RecoveryStage.RECEIVED_SNAPSHOT;
/*
* Clean out all txns from before the snapshot
*/
Iterator<Map.Entry<Long, OrderableTransaction>> iter = m_transactionsById.entrySet().iterator();
while (iter.hasNext()) {
final Map.Entry<Long, OrderableTransaction> entry = iter.next();
if (entry.getKey() < m_minTxnIdAfterRecovery) {
m_txnQueue.faultTransaction(entry.getValue());
iter.remove();
}
}
} else if (type == BINARY_PAYLOAD_JOIN_REQUEST) {
JSONObject jsObj = new JSONObject(new String(bpm.m_payload, "UTF-8"));
final long initiatorHSId = jsObj.getLong("initiatorHSId");
final long txnId = jsObj.getLong("txnId");
final long lastSafeTxnId = jsObj.getLong("lastSafeTxnId");
final long joiningHSId = jsObj.getLong("joiningHSId");
if (m_recovering) {
org.voltdb.VoltDB.crashLocalVoltDB("Received a join request during recovery for " + CoreUtils.hsIdToString(joiningHSId) + " from " + CoreUtils.hsIdToString(initiatorHSId), true, null);
}
m_txnQueue.noteTransactionRecievedAndReturnLastSeen(initiatorHSId, txnId, lastSafeTxnId);
AgreementRejoinTransactionState transactionState = new AgreementRejoinTransactionState(txnId, initiatorHSId, joiningHSId, null);
if (m_txnQueue.add(transactionState)) {
m_transactionsById.put(transactionState.txnId, transactionState);
} else {
m_agreementLog.info("Dropping txn " + transactionState.txnId + " data from failed initiatorSiteId: " + transactionState.initiatorHSId);
}
}
} else if (message instanceof FaultMessage) {
FaultMessage fm = (FaultMessage) message;
discoverGlobalFaultData(fm);
} else if (message instanceof RecoveryMessage) {
RecoveryMessage rm = (RecoveryMessage) message;
assert (m_recoverBeforeTxn == null);
assert (m_siteRequestingRecovery == null);
assert (m_recovering == false);
assert (m_recoveryStage == RecoveryStage.RECOVERED);
m_recoverBeforeTxn = rm.txnId();
m_siteRequestingRecovery = rm.sourceSite();
}
}
Aggregations