use of org.voltdb.SnapshotCompletionInterest.SnapshotCompletionEvent in project voltdb by VoltDB.
the class SnapshotCompletionMonitor method processSnapshotData.
private void processSnapshotData(byte[] data) throws Exception {
if (data == null) {
return;
}
JSONObject jsonObj = new JSONObject(new String(data, "UTF-8"));
long txnId = jsonObj.getLong("txnId");
int hostCount = jsonObj.getInt("hostCount");
String path = jsonObj.getString(SnapshotUtil.JSON_PATH);
SnapshotPathType stype = SnapshotPathType.valueOf(jsonObj.getString(SnapshotUtil.JSON_PATH_TYPE));
String nonce = jsonObj.getString(SnapshotUtil.JSON_NONCE);
boolean truncation = jsonObj.getBoolean("isTruncation");
boolean didSucceed = jsonObj.getBoolean("didSucceed");
// A truncation request ID is not always provided. It's used for
// snapshots triggered indirectly via ZooKeeper so that the
// triggerer can recognize the snapshot when it finishes.
String truncReqId = jsonObj.optString("truncReqId");
if (hostCount == 0) {
/*
* Convert the JSON object containing the export sequence numbers for each
* table and partition to a regular map
*/
Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers = null;
final JSONObject exportSequenceJSON = jsonObj.getJSONObject("exportSequenceNumbers");
final ImmutableMap.Builder<String, Map<Integer, Pair<Long, Long>>> builder = ImmutableMap.builder();
@SuppressWarnings("unchecked") final Iterator<String> tableKeys = exportSequenceJSON.keys();
while (tableKeys.hasNext()) {
final String tableName = tableKeys.next();
final JSONObject tableSequenceNumbers = exportSequenceJSON.getJSONObject(tableName);
ImmutableMap.Builder<Integer, Pair<Long, Long>> tableBuilder = ImmutableMap.builder();
@SuppressWarnings("unchecked") final Iterator<String> partitionKeys = tableSequenceNumbers.keys();
while (partitionKeys.hasNext()) {
final String partitionString = partitionKeys.next();
final Integer partitionId = Integer.valueOf(partitionString);
JSONObject sequenceNumbers = tableSequenceNumbers.getJSONObject(partitionString);
final Long ackOffset = sequenceNumbers.getLong("ackOffset");
final Long sequenceNumber = sequenceNumbers.getLong("sequenceNumber");
tableBuilder.put(partitionId, Pair.of(ackOffset, sequenceNumber));
}
builder.put(tableName, tableBuilder.build());
}
exportSequenceNumbers = builder.build();
long clusterCreateTime = jsonObj.optLong("clusterCreateTime", -1);
Map<Integer, Long> drSequenceNumbers = new HashMap<>();
JSONObject drTupleStreamJSON = jsonObj.getJSONObject("drTupleStreamStateInfo");
Iterator<String> partitionKeys = drTupleStreamJSON.keys();
int drVersion = 0;
while (partitionKeys.hasNext()) {
String partitionIdString = partitionKeys.next();
JSONObject stateInfo = drTupleStreamJSON.getJSONObject(partitionIdString);
drVersion = (int) stateInfo.getLong("drVersion");
drSequenceNumbers.put(Integer.valueOf(partitionIdString), stateInfo.getLong("sequenceNumber"));
}
Map<Integer, Long> partitionTxnIdsMap = ImmutableMap.of();
synchronized (m_snapshotTxnIdsToPartitionTxnIds) {
Map<Integer, Long> partitionTxnIdsList = m_snapshotTxnIdsToPartitionTxnIds.get(txnId);
if (partitionTxnIdsList != null) {
partitionTxnIdsMap = ImmutableMap.copyOf(partitionTxnIdsList);
}
}
/*
* Collect all the last seen ids from the remote data centers so they can
* be used by live rejoin to initialize a starting state for applying DR
* data
*/
Map<Integer, Map<Integer, Map<Integer, DRConsumerDrIdTracker>>> drMixedClusterSizeConsumerState = new HashMap<>();
JSONObject consumerPartitions = jsonObj.getJSONObject("drMixedClusterSizeConsumerState");
Iterator<String> cpKeys = consumerPartitions.keys();
while (cpKeys.hasNext()) {
final String consumerPartitionIdStr = cpKeys.next();
final Integer consumerPartitionId = Integer.valueOf(consumerPartitionIdStr);
JSONObject siteInfo = consumerPartitions.getJSONObject(consumerPartitionIdStr);
drMixedClusterSizeConsumerState.put(consumerPartitionId, ExtensibleSnapshotDigestData.buildConsumerSiteDrIdTrackersFromJSON(siteInfo));
}
Iterator<SnapshotCompletionInterest> iter = m_interests.iterator();
while (iter.hasNext()) {
SnapshotCompletionInterest interest = iter.next();
try {
interest.snapshotCompleted(new SnapshotCompletionEvent(path, stype, nonce, txnId, partitionTxnIdsMap, truncation, didSucceed, truncReqId, exportSequenceNumbers, Collections.unmodifiableMap(drSequenceNumbers), Collections.unmodifiableMap(drMixedClusterSizeConsumerState), drVersion, clusterCreateTime));
} catch (Exception e) {
SNAP_LOG.warn("Exception while executing snapshot completion interest", e);
}
}
}
}
use of org.voltdb.SnapshotCompletionInterest.SnapshotCompletionEvent in project voltdb by VoltDB.
the class ElasticJoinProducer method runForBlockingDataTransfer.
/**
* Blocking transfer all partitioned table data and notify the coordinator.
* @param siteConnection
*/
private void runForBlockingDataTransfer(SiteProcedureConnection siteConnection) {
boolean sourcesReady = false;
RestoreWork restoreWork = m_dataSink.poll(m_snapshotBufferAllocator);
if (restoreWork != null) {
restoreBlock(restoreWork, siteConnection);
sourcesReady = true;
}
// replicated table in the database, so check for both conditions.
if (m_dataSink.isEOF() || m_snapshotCompletionMonitor.isDone()) {
// No more data from this data sink, close and remove it from the list
m_dataSink.close();
if (m_streamSnapshotMb != null) {
VoltDB.instance().getHostMessenger().removeMailbox(m_streamSnapshotMb.getHSId());
}
JOINLOG.debug(m_whoami + " data transfer is finished");
if (m_snapshotCompletionMonitor.isDone()) {
try {
SnapshotCompletionEvent event = m_snapshotCompletionMonitor.get();
siteConnection.setDRProtocolVersion(event.drVersion);
assert (event != null);
JOINLOG.debug("P" + m_partitionId + " noticed data transfer completion");
m_completionAction.setSnapshotTxnId(event.multipartTxnId);
setJoinComplete(siteConnection, event.exportSequenceNumbers, event.drSequenceNumbers, event.drMixedClusterSizeConsumerState, false, /* requireExistingSequenceNumbers */
event.clusterCreateTime);
} catch (InterruptedException e) {
// isDone() already returned true, this shouldn't happen
VoltDB.crashLocalVoltDB("Impossible interruption happend", true, e);
} catch (ExecutionException e) {
VoltDB.crashLocalVoltDB("Error waiting for snapshot to finish", true, e);
}
} else {
m_taskQueue.offer(this);
}
} else {
// The sources are not set up yet, don't block the site,
// return here and retry later.
returnToTaskQueue(sourcesReady);
}
}
use of org.voltdb.SnapshotCompletionInterest.SnapshotCompletionEvent in project voltdb by VoltDB.
the class RejoinProducer method doFinishingTask.
private void doFinishingTask(final SiteProcedureConnection siteConnection) {
/*
* Don't notify the rejoin coordinator yet. The stream snapshot may
* have not finished on all nodes, let the snapshot completion
* monitor tell the rejoin coordinator.
*
* This used to block on the completion interest, but this raced
* with fragments from the MPI that needed dummy responses. If the fragments
* came after the EOF then they wouldn't receive dummy responses
* and then the MPI wouldn't invoke SnapshotSaveAPI.logParticipatingHostCount
*/
final SiteTasker finishingTask = new SiteTasker() {
@Override
public void run(SiteProcedureConnection siteConnection) {
throw new RuntimeException("Unexpected execution of run method in rejoin producer.");
}
@Override
public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog rejoinTaskLog) throws IOException {
if (!m_snapshotCompletionMonitor.isDone()) {
m_taskQueue.offer(this);
return;
}
SnapshotCompletionEvent event = null;
Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers = null;
Map<Integer, Long> drSequenceNumbers = null;
Map<Integer, Map<Integer, Map<Integer, DRConsumerDrIdTracker>>> allConsumerSiteTrackers = null;
long clusterCreateTime = -1;
try {
event = m_snapshotCompletionMonitor.get();
if (!m_schemaHasNoTables) {
REJOINLOG.debug(m_whoami + "waiting on snapshot completion monitor.");
exportSequenceNumbers = event.exportSequenceNumbers;
m_completionAction.setSnapshotTxnId(event.multipartTxnId);
drSequenceNumbers = event.drSequenceNumbers;
allConsumerSiteTrackers = event.drMixedClusterSizeConsumerState;
clusterCreateTime = event.clusterCreateTime;
// Tells EE which DR version going to use
siteConnection.setDRProtocolVersion(event.drVersion);
}
REJOINLOG.debug(m_whoami + " monitor completed. Sending SNAPSHOT_FINISHED " + "and handing off to site.");
RejoinMessage snap_complete = new RejoinMessage(m_mailbox.getHSId(), Type.SNAPSHOT_FINISHED);
m_mailbox.send(m_coordinatorHsId, snap_complete);
} catch (InterruptedException crashme) {
VoltDB.crashLocalVoltDB("Interrupted awaiting snapshot completion.", true, crashme);
} catch (ExecutionException e) {
VoltDB.crashLocalVoltDB("Unexpected exception awaiting snapshot completion.", true, e);
}
if (exportSequenceNumbers == null) {
// Send empty sequence number map if the schema is empty (no tables).
exportSequenceNumbers = new HashMap<String, Map<Integer, Pair<Long, Long>>>();
}
setJoinComplete(siteConnection, exportSequenceNumbers, drSequenceNumbers, allConsumerSiteTrackers, m_schemaHasNoTables == false, /* requireExistingSequenceNumbers */
clusterCreateTime);
}
};
try {
finishingTask.runForRejoin(siteConnection, null);
} catch (IOException e) {
VoltDB.crashLocalVoltDB("Unexpected IOException in rejoin", true, e);
}
}
Aggregations