use of org.voltcore.messaging.BinaryPayloadMessage in project voltdb by VoltDB.
the class AgreementSite method shipZKDatabaseSnapshot.
private void shipZKDatabaseSnapshot(long joiningAgreementSite, long txnId) throws IOException {
m_recoveryLog.info("Shipping ZK snapshot from " + CoreUtils.hsIdToString(m_hsId) + " to " + CoreUtils.hsIdToString(joiningAgreementSite));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
BinaryOutputArchive boa = new BinaryOutputArchive(dos);
m_server.getZKDatabase().serializeSnapshot(boa);
dos.flush();
byte[] databaseBytes = org.xerial.snappy.Snappy.compress(baos.toByteArray());
ByteBuffer metadata = ByteBuffer.allocate(9);
metadata.put(BINARY_PAYLOAD_SNAPSHOT);
metadata.putLong(txnId);
BinaryPayloadMessage bpm = new BinaryPayloadMessage(metadata.array(), databaseBytes);
m_mailbox.send(joiningAgreementSite, bpm);
m_siteRequestingRecovery = null;
m_recoverBeforeTxn = null;
}
use of org.voltcore.messaging.BinaryPayloadMessage in project voltdb by VoltDB.
the class Cartographer method sendLeaderChangeNotify.
// This message used to be sent by the SP or MP initiator when they accepted a promotion.
// For dev speed, we'll detect mastership changes here and construct and send this message to the
// local client interface so we can keep the CIs implementation
private void sendLeaderChangeNotify(long hsId, int partitionId) {
try {
JSONStringer stringer = new JSONStringer();
stringer.object();
stringer.keySymbolValuePair(JSON_PARTITION_ID, partitionId);
stringer.keySymbolValuePair(JSON_INITIATOR_HSID, hsId);
stringer.endObject();
BinaryPayloadMessage bpm = new BinaryPayloadMessage(new byte[0], stringer.toString().getBytes("UTF-8"));
int hostId = m_hostMessenger.getHostId();
m_hostMessenger.send(CoreUtils.getHSIdFromHostAndSite(hostId, HostMessenger.CLIENT_INTERFACE_SITE_ID), bpm);
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Unable to propogate leader promotion to client interface.", true, e);
}
}
use of org.voltcore.messaging.BinaryPayloadMessage in project voltdb by VoltDB.
the class AgreementSite method processMessage.
private void processMessage(VoltMessage message) throws Exception {
if (!m_hsIds.contains(message.m_sourceHSId)) {
m_recoveryLog.info("Dropping message " + message + " because it is not from a known up site");
return;
}
if (message instanceof TransactionInfoBaseMessage) {
TransactionInfoBaseMessage info = (TransactionInfoBaseMessage) message;
// Special case heartbeats which only update RPQ
if (info instanceof HeartbeatMessage) {
// use the heartbeat to unclog the priority queue if clogged
long lastSeenTxnFromInitiator = m_txnQueue.noteTransactionRecievedAndReturnLastSeen(info.getInitiatorHSId(), info.getTxnId(), ((HeartbeatMessage) info).getLastSafeTxnId());
// respond to the initiator with the last seen transaction
HeartbeatResponseMessage response = new HeartbeatResponseMessage(m_hsId, lastSeenTxnFromInitiator, m_txnQueue.getQueueState() == RestrictedPriorityQueue.QueueState.BLOCKED_SAFETY);
m_mailbox.send(info.getInitiatorHSId(), response);
// we're done here (in the case of heartbeats)
return;
}
assert (false);
} else if (message instanceof HeartbeatResponseMessage) {
HeartbeatResponseMessage hrm = (HeartbeatResponseMessage) message;
m_safetyState.updateLastSeenTxnIdFromExecutorBySiteId(hrm.getExecHSId(), hrm.getLastReceivedTxnId());
} else if (message instanceof LocalObjectMessage) {
LocalObjectMessage lom = (LocalObjectMessage) message;
if (lom.payload instanceof Runnable) {
((Runnable) lom.payload).run();
} else if (lom.payload instanceof Request) {
Request r = (Request) lom.payload;
long txnId = 0;
boolean isRead = false;
switch(r.type) {
case OpCode.createSession:
txnId = r.sessionId;
break;
//For reads see if we can skip global agreement and just do the read
case OpCode.exists:
case OpCode.getChildren:
case OpCode.getChildren2:
case OpCode.getData:
//in this case because ordering of reads and writes matters
if (m_txnQueue.isEmpty()) {
r.setOwner(m_hsId);
m_server.prepRequest(new Request(r), m_lastUsedTxnId);
return;
}
isRead = true;
//it in the global order
default:
txnId = m_idManager.getNextUniqueTransactionId();
break;
}
/*
* Don't send the whole request if this is a read blocked on a write
* We may send a heartbeat instead of propagating a useless read transaction
* at the end of this block
*/
if (!isRead) {
for (long initiatorHSId : m_hsIds) {
if (initiatorHSId == m_hsId)
continue;
AgreementTaskMessage atm = new AgreementTaskMessage(r, txnId, m_hsId, m_safetyState.getNewestGloballySafeTxnId());
m_mailbox.send(initiatorHSId, atm);
}
}
//Process the ATM eagerly locally to aid
//in having a complete set of stuff to ship
//to a recovering agreement site
AgreementTaskMessage atm = new AgreementTaskMessage(new Request(r), txnId, m_hsId, m_safetyState.getNewestGloballySafeTxnId());
atm.m_sourceHSId = m_hsId;
processMessage(atm);
/*
* Don't send a heartbeat out for ever single blocked read that occurs
* Try and limit to 2000 a second which is a lot and should be pretty
* close to the previous behavior of propagating all reads. My measurements
* don't show the old behavior is better than none at all, but I fear
* change.
*/
if (isRead) {
final long now = System.nanoTime();
if (TimeUnit.NANOSECONDS.toMicros(now - m_lastHeartbeatTime) > 500) {
m_lastHeartbeatTime = now;
sendHeartbeats();
}
}
}
} else if (message instanceof AgreementTaskMessage) {
AgreementTaskMessage atm = (AgreementTaskMessage) message;
if (!m_transactionsById.containsKey(atm.m_txnId) && atm.m_txnId >= m_minTxnIdAfterRecovery) {
m_txnQueue.noteTransactionRecievedAndReturnLastSeen(atm.m_initiatorHSId, atm.m_txnId, atm.m_lastSafeTxnId);
AgreementTransactionState transactionState = new AgreementTransactionState(atm.m_txnId, atm.m_initiatorHSId, atm.m_request);
if (m_txnQueue.add(transactionState)) {
m_transactionsById.put(transactionState.txnId, transactionState);
} else {
m_agreementLog.info("Dropping txn " + transactionState.txnId + " data from failed initiatorSiteId: " + transactionState.initiatorHSId);
}
} else {
m_recoveryLog.info("Agreement, discarding duplicate txn during recovery, txnid is " + atm.m_txnId + " this should only occur during recovery. minTxnIdAfterRecovery " + m_minTxnIdAfterRecovery + " and dup is " + m_transactionsById.containsKey(atm.m_txnId));
}
} else if (message instanceof BinaryPayloadMessage) {
BinaryPayloadMessage bpm = (BinaryPayloadMessage) message;
ByteBuffer metadata = ByteBuffer.wrap(bpm.m_metadata);
final byte type = metadata.get();
if (type == BINARY_PAYLOAD_SNAPSHOT) {
assert (m_recovering);
assert (m_recoveryStage == RecoveryStage.SENT_PROPOSAL);
if (m_recoveryStage != RecoveryStage.SENT_PROPOSAL) {
org.voltdb.VoltDB.crashLocalVoltDB("Received a recovery snapshot in stage " + m_recoveryStage.toString(), true, null);
}
long selectedRecoverBeforeTxn = metadata.getLong();
if (selectedRecoverBeforeTxn < m_recoverBeforeTxn) {
org.voltdb.VoltDB.crashLocalVoltDB("Selected recover before txn was earlier than the proposed recover before txn", true, null);
}
m_recoverBeforeTxn = selectedRecoverBeforeTxn;
//anything before this precedes the snapshot
m_minTxnIdAfterRecovery = m_recoverBeforeTxn;
try {
m_recoverySnapshot = org.xerial.snappy.Snappy.uncompress(bpm.m_payload);
} catch (IOException e) {
org.voltdb.VoltDB.crashLocalVoltDB("Unable to decompress ZK snapshot", true, e);
}
m_recoveryStage = RecoveryStage.RECEIVED_SNAPSHOT;
/*
* Clean out all txns from before the snapshot
*/
Iterator<Map.Entry<Long, OrderableTransaction>> iter = m_transactionsById.entrySet().iterator();
while (iter.hasNext()) {
final Map.Entry<Long, OrderableTransaction> entry = iter.next();
if (entry.getKey() < m_minTxnIdAfterRecovery) {
m_txnQueue.faultTransaction(entry.getValue());
iter.remove();
}
}
} else if (type == BINARY_PAYLOAD_JOIN_REQUEST) {
JSONObject jsObj = new JSONObject(new String(bpm.m_payload, "UTF-8"));
final long initiatorHSId = jsObj.getLong("initiatorHSId");
final long txnId = jsObj.getLong("txnId");
final long lastSafeTxnId = jsObj.getLong("lastSafeTxnId");
final long joiningHSId = jsObj.getLong("joiningHSId");
if (m_recovering) {
org.voltdb.VoltDB.crashLocalVoltDB("Received a join request during recovery for " + CoreUtils.hsIdToString(joiningHSId) + " from " + CoreUtils.hsIdToString(initiatorHSId), true, null);
}
m_txnQueue.noteTransactionRecievedAndReturnLastSeen(initiatorHSId, txnId, lastSafeTxnId);
AgreementRejoinTransactionState transactionState = new AgreementRejoinTransactionState(txnId, initiatorHSId, joiningHSId, null);
if (m_txnQueue.add(transactionState)) {
m_transactionsById.put(transactionState.txnId, transactionState);
} else {
m_agreementLog.info("Dropping txn " + transactionState.txnId + " data from failed initiatorSiteId: " + transactionState.initiatorHSId);
}
}
} else if (message instanceof FaultMessage) {
FaultMessage fm = (FaultMessage) message;
discoverGlobalFaultData(fm);
} else if (message instanceof RecoveryMessage) {
RecoveryMessage rm = (RecoveryMessage) message;
assert (m_recoverBeforeTxn == null);
assert (m_siteRequestingRecovery == null);
assert (m_recovering == false);
assert (m_recoveryStage == RecoveryStage.RECOVERED);
m_recoverBeforeTxn = rm.txnId();
m_siteRequestingRecovery = rm.sourceSite();
}
}
use of org.voltcore.messaging.BinaryPayloadMessage in project voltdb by VoltDB.
the class AgreementSite method requestJoin.
/*
* Construct a ZK transaction that will add the initiator to the cluster
*/
public CountDownLatch requestJoin(final long joiningSite) throws Exception {
final CountDownLatch cdl = new CountDownLatch(1);
final Runnable r = new Runnable() {
@Override
public void run() {
try {
final long txnId = m_idManager.getNextUniqueTransactionId();
for (long initiatorHSId : m_hsIds) {
if (initiatorHSId == m_hsId)
continue;
JSONObject jsObj = new JSONObject();
jsObj.put("txnId", txnId);
jsObj.put("initiatorHSId", m_hsId);
jsObj.put("joiningHSId", joiningSite);
jsObj.put("lastSafeTxnId", m_safetyState.getNewestSafeTxnIdForExecutorBySiteId(initiatorHSId));
byte[] payload = jsObj.toString(4).getBytes("UTF-8");
ByteBuffer metadata = ByteBuffer.allocate(1);
metadata.put(BINARY_PAYLOAD_JOIN_REQUEST);
BinaryPayloadMessage bpm = new BinaryPayloadMessage(metadata.array(), payload);
m_mailbox.send(initiatorHSId, bpm);
}
m_txnQueue.noteTransactionRecievedAndReturnLastSeen(m_hsId, txnId, m_safetyState.getNewestGloballySafeTxnId());
AgreementRejoinTransactionState arts = new AgreementRejoinTransactionState(txnId, m_hsId, joiningSite, cdl);
if (!m_txnQueue.add(arts)) {
org.voltdb.VoltDB.crashLocalVoltDB("Shouldn't have failed to add txn", true, null);
}
m_transactionsById.put(arts.txnId, arts);
} catch (Throwable e) {
org.voltdb.VoltDB.crashLocalVoltDB("Error constructing JSON", false, e);
}
}
};
LocalObjectMessage lom = new LocalObjectMessage(r);
lom.m_sourceHSId = m_hsId;
m_mailbox.deliver(lom);
return cdl;
}
use of org.voltcore.messaging.BinaryPayloadMessage in project voltdb by VoltDB.
the class OpsAgent method sendOpsResponse.
/**
* Return the results of distributed work to the original requesting agent.
* Used by subclasses to respond after they've done their local work.
*/
private void sendOpsResponse(VoltTable[] results, JSONObject obj, byte payloadType) throws Exception {
long requestId = obj.getLong("requestId");
long returnAddress = obj.getLong("returnAddress");
// Send a response with no data since the stats is not supported or not yet available
if (results == null) {
ByteBuffer responseBuffer = ByteBuffer.allocate(8);
responseBuffer.putLong(requestId);
byte[] responseBytes = CompressionService.compressBytes(responseBuffer.array());
BinaryPayloadMessage bpm = new BinaryPayloadMessage(new byte[] { payloadType }, responseBytes);
m_mailbox.send(returnAddress, bpm);
return;
}
ByteBuffer[] bufs = new ByteBuffer[results.length];
int statbytes = 0;
for (int i = 0; i < results.length; i++) {
bufs[i] = results[i].getBuffer();
bufs[i].position(0);
statbytes += bufs[i].remaining();
}
ByteBuffer responseBuffer = ByteBuffer.allocate(// requestId
8 + // length prefix for each stats table
4 * results.length + +statbytes);
responseBuffer.putLong(requestId);
for (int i = 0; i < bufs.length; i++) {
responseBuffer.putInt(bufs[i].remaining());
responseBuffer.put(bufs[i]);
}
byte[] responseBytes = CompressionService.compressBytes(responseBuffer.array());
BinaryPayloadMessage bpm = new BinaryPayloadMessage(new byte[] { payloadType }, responseBytes);
m_mailbox.send(returnAddress, bpm);
}
Aggregations