use of org.json_voltpatches.JSONException in project voltdb by VoltDB.
the class SnapshotSaveAPI method startSnapshotting.
/**
* The only public method: do all the work to start a snapshot.
* Assumes that a snapshot is feasible, that the caller has validated it can
* be accomplished, that the caller knows this is a consistent or useful
* transaction point at which to snapshot.
*
* @param file_path
* @param file_nonce
* @param format
* @param block
* @param txnId
* @param data
* @param context
* @param hostname
* @return VoltTable describing the results of the snapshot attempt
*/
public VoltTable startSnapshotting(final String file_path, final String pathType, final String file_nonce, final SnapshotFormat format, final byte block, final long multiPartTxnId, final long partitionTxnId, final long[] legacyPerPartitionTxnIds, final String data, final SystemProcedureExecutionContext context, final String hostname, final HashinatorSnapshotData hashinatorData, final long timestamp) {
TRACE_LOG.trace("Creating snapshot target and handing to EEs");
final VoltTable result = SnapshotUtil.constructNodeResultsTable();
JSONObject jsData = null;
if (data != null && !data.isEmpty()) {
try {
jsData = new JSONObject(data);
} catch (JSONException e) {
SNAP_LOG.error(String.format("JSON exception on snapshot data \"%s\".", data), e);
}
}
final JSONObject finalJsData = jsData;
JSONObject perSiteRemoteDataCenterDrIds;
try {
perSiteRemoteDataCenterDrIds = ExtensibleSnapshotDigestData.serializeSiteConsumerDrIdTrackersToJSON(context.getDrAppliedTrackers());
} catch (JSONException e) {
SNAP_LOG.warn("Failed to serialize the Remote DataCenter's Last applied DRIds");
perSiteRemoteDataCenterDrIds = new JSONObject();
}
// number of snapshot permits.
synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) {
SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set(new Runnable() {
@Override
public void run() {
Map<Integer, Long> partitionTransactionIds = m_partitionLastSeenTransactionIds;
SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds);
m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>();
partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId);
Map<Integer, JSONObject> remoteDataCenterLastIds = m_remoteDataCenterLastIds;
m_remoteDataCenterLastIds = new HashMap<Integer, JSONObject>();
/*
* Do a quick sanity check that the provided IDs
* don't conflict with currently active partitions. If they do
* it isn't fatal we can just skip it.
*/
for (long txnId : legacyPerPartitionTxnIds) {
final int legacyPartition = TxnEgo.getPartitionId(txnId);
if (partitionTransactionIds.containsKey(legacyPartition)) {
SNAP_LOG.warn("While saving a snapshot and propagating legacy " + "transaction ids found an id that matches currently active partition" + partitionTransactionIds.get(legacyPartition));
} else {
partitionTransactionIds.put(legacyPartition, txnId);
}
}
m_allLocalSiteSnapshotDigestData = new ExtensibleSnapshotDigestData(SnapshotSiteProcessor.getExportSequenceNumbers(), SnapshotSiteProcessor.getDRTupleStreamStateInfo(), remoteDataCenterLastIds, finalJsData);
createSetupIv2(file_path, pathType, file_nonce, format, multiPartTxnId, partitionTransactionIds, finalJsData, context, result, m_allLocalSiteSnapshotDigestData, context.getSiteTrackerForSnapshot(), hashinatorData, timestamp);
}
});
// Create a barrier to use with the current number of sites to wait for
// or if the barrier is already set up check if it is broken and reset if necessary
final int numLocalSites = context.getLocalSitesCount();
SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites);
//From within this EE, record the sequence numbers as of the start of the snapshot (now)
//so that the info can be put in the digest.
SnapshotSiteProcessor.populateSequenceNumbersForExecutionSite(context);
Integer partitionId = TxnEgo.getPartitionId(partitionTxnId);
SNAP_LOG.debug("Registering transaction id " + partitionTxnId + " for " + TxnEgo.getPartitionId(partitionTxnId));
m_partitionLastSeenTransactionIds.put(partitionId, partitionTxnId);
m_remoteDataCenterLastIds.put(partitionId, perSiteRemoteDataCenterDrIds);
}
boolean runPostTasks = false;
VoltTable earlyResultTable = null;
try {
SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await();
try {
synchronized (m_createLock) {
SNAP_LOG.debug("Found tasks for HSIds: " + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet()));
SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId()));
Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId());
// switch to figure out what flavor of empty SnapshotSave result table to return.
if (!m_createSuccess.get()) {
// There shouldn't be any work for any site if we failed
assert (m_taskListsForHSIds.isEmpty());
VoltTable finalresult = m_createResult.get();
if (finalresult != null) {
m_createResult.set(null);
earlyResultTable = finalresult;
} else {
// We returned a non-empty NodeResultsTable with the failures in it,
// every other site needs to return a NodeResultsTable as well.
earlyResultTable = SnapshotUtil.constructNodeResultsTable();
}
} else if (taskList == null) {
SNAP_LOG.debug("No task for this site, block " + block);
// Send back an appropriate empty table based on the block flag
if (block != 0) {
runPostTasks = true;
earlyResultTable = SnapshotUtil.constructPartitionResultsTable();
earlyResultTable.addRow(context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), "SUCCESS", "");
} else {
//If doing snapshot for only replicated table(s), earlyResultTable here
//may not be empty even if the taskList of this site is null.
//In that case, snapshot result is preserved by earlyResultTable.
earlyResultTable = result;
}
} else {
context.getSiteSnapshotConnection().initiateSnapshots(format, taskList, multiPartTxnId, m_allLocalSiteSnapshotDigestData);
}
if (m_deferredSetupFuture != null && taskList != null) {
// Add a listener to the deferred setup so that it can kick off the snapshot
// task once the setup is done.
m_deferredSetupFuture.addListener(new Runnable() {
@Override
public void run() {
DeferredSnapshotSetup deferredSnapshotSetup = null;
try {
deferredSnapshotSetup = m_deferredSetupFuture.get();
} catch (Exception e) {
// it doesn't throw
}
assert deferredSnapshotSetup != null;
context.getSiteSnapshotConnection().startSnapshotWithTargets(deferredSnapshotSetup.getPlan().getSnapshotDataTargets());
}
}, CoreUtils.SAMETHREADEXECUTOR);
}
}
} finally {
SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS);
}
} catch (TimeoutException e) {
VoltDB.crashLocalVoltDB("Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null);
} catch (InterruptedException e) {
result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
earlyResultTable = result;
} catch (BrokenBarrierException e) {
result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
earlyResultTable = result;
} catch (IllegalArgumentException e) {
result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e));
earlyResultTable = result;
}
// If earlyResultTable is set, return here
if (earlyResultTable != null) {
if (runPostTasks) {
// Need to run post-snapshot tasks before finishing
SnapshotSiteProcessor.runPostSnapshotTasks(context);
}
return earlyResultTable;
}
if (block != 0) {
HashSet<Exception> failures = Sets.newHashSet();
String status = "SUCCESS";
String err = "";
try {
// For blocking snapshot, propogate the error from deferred setup back to the client
final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get();
if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) {
status = "FAILURE";
err = deferredSnapshotSetup.getError().toString();
failures.add(deferredSnapshotSetup.getError());
}
failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork());
SnapshotSiteProcessor.runPostSnapshotTasks(context);
} catch (Exception e) {
status = "FAILURE";
err = e.toString();
failures.add(e);
}
final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable();
if (failures.isEmpty()) {
blockingResult.addRow(context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err);
} else {
status = "FAILURE";
for (Exception e : failures) {
err = e.toString();
}
blockingResult.addRow(context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err);
}
return blockingResult;
}
return result;
}
use of org.json_voltpatches.JSONException in project voltdb by VoltDB.
the class RestoreAgent method checkSnapshotIsComplete.
private SnapshotInfo checkSnapshotIsComplete(Long key, Snapshot s) {
int partitionCount = -1;
for (TableFiles tf : s.m_tableFiles.values()) {
// Check if the snapshot is complete
if (tf.m_completed.stream().anyMatch(b -> !b)) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because it was not completed.");
return null;
}
// Replicated table doesn't check partition count
if (tf.m_isReplicated) {
continue;
}
// Everyone has to agree on the total partition count
for (int count : tf.m_totalPartitionCounts) {
if (partitionCount == -1) {
partitionCount = count;
} else if (count != partitionCount) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because it had the wrong partition count ").append(count).append(", expecting ").append(partitionCount);
return null;
}
}
}
if (s.m_digests.isEmpty()) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because it had no valid digest file.");
return null;
}
File digest = s.m_digests.get(0);
Long catalog_crc = null;
Map<Integer, Long> pidToTxnMap = new TreeMap<Integer, Long>();
Set<String> digestTableNames = new HashSet<String>();
// Create a valid but meaningless InstanceId to support pre-instanceId checking versions
InstanceId instanceId = new InstanceId(0, 0);
int newParitionCount = -1;
try {
JSONObject digest_detail = SnapshotUtil.CRCCheck(digest, LOG);
if (digest_detail == null)
throw new IOException();
catalog_crc = digest_detail.getLong("catalogCRC");
if (digest_detail.has("partitionTransactionIds")) {
JSONObject pidToTxnId = digest_detail.getJSONObject("partitionTransactionIds");
Iterator<String> it = pidToTxnId.keys();
while (it.hasNext()) {
String pidkey = it.next();
Long txnidval = pidToTxnId.getLong(pidkey);
pidToTxnMap.put(Integer.valueOf(pidkey), txnidval);
}
}
if (digest_detail.has("instanceId")) {
instanceId = new InstanceId(digest_detail.getJSONObject("instanceId"));
}
if (digest_detail.has("newPartitionCount")) {
newParitionCount = digest_detail.getInt("newPartitionCount");
}
if (digest_detail.has("tables")) {
JSONArray tableObj = digest_detail.getJSONArray("tables");
for (int i = 0; i < tableObj.length(); i++) {
digestTableNames.add(tableObj.getString(i));
}
}
} catch (IOException ioe) {
m_snapshotErrLogStr.append("\nUnable to read digest file: ").append(digest.getAbsolutePath()).append(" due to: ").append(ioe.getMessage());
return null;
} catch (JSONException je) {
m_snapshotErrLogStr.append("\nUnable to extract catalog CRC from digest: ").append(digest.getAbsolutePath()).append(" due to: ").append(je.getMessage());
return null;
}
if (s.m_catalogFile == null) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because it had no catalog.");
return null;
}
try {
byte[] bytes = MiscUtils.fileToBytes(s.m_catalogFile);
InMemoryJarfile jarfile = CatalogUtil.loadInMemoryJarFile(bytes);
if (jarfile.getCRC() != catalog_crc) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because catalog CRC did not match digest.");
return null;
}
// Make sure this is not a partial snapshot.
// Compare digestTableNames with all normal table names in catalog file.
// A normal table is one that's NOT a materialized view, nor an export table.
Set<String> catalogNormalTableNames = CatalogUtil.getNormalTableNamesFromInMemoryJar(jarfile);
if (!catalogNormalTableNames.equals(digestTableNames)) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because this is a partial snapshot.");
return null;
}
} catch (IOException ioe) {
m_snapshotErrLogStr.append("\nRejected snapshot ").append(s.getNonce()).append(" because catalog file could not be validated");
return null;
}
SnapshotInfo info = new SnapshotInfo(key, digest.getParent(), SnapshotUtil.parseNonceFromDigestFilename(digest.getName()), partitionCount, newParitionCount, catalog_crc, m_hostId, instanceId, digestTableNames, s.m_stype);
// populate table to partition map.
for (Entry<String, TableFiles> te : s.m_tableFiles.entrySet()) {
TableFiles tableFile = te.getValue();
HashSet<Integer> ids = new HashSet<Integer>();
for (Set<Integer> idSet : tableFile.m_validPartitionIds) {
ids.addAll(idSet);
}
if (!tableFile.m_isReplicated) {
info.partitions.put(te.getKey(), ids);
}
// keep track of tables for which we've seen files while we're here
info.fileTables.add(te.getKey());
}
info.setPidToTxnIdMap(pidToTxnMap);
return info;
}
use of org.json_voltpatches.JSONException in project voltdb by VoltDB.
the class RestoreAgent method deserializeRestoreInformation.
/**
* This function, like all good functions, does three things.
* It produces the command log start transaction Id.
* It produces a map of SnapshotInfo objects.
* And, it errors if the remote start action does not match the local action.
*/
private Long deserializeRestoreInformation(List<String> children, Map<String, Set<SnapshotInfo>> snapshotFragments) throws Exception {
try {
int recover = m_action.ordinal();
Long clStartTxnId = null;
for (String node : children) {
//This might be created before we are done fetching the restore info
if (node.equals("snapshot_id"))
continue;
byte[] data = null;
data = m_zk.getData(VoltZK.restore + "/" + node, false, null);
String jsonData = new String(data, "UTF8");
JSONObject json = new JSONObject(jsonData);
long maxTxnId = json.optLong("max", Long.MIN_VALUE);
if (maxTxnId != Long.MIN_VALUE) {
if (clStartTxnId == null || maxTxnId > clStartTxnId) {
clStartTxnId = maxTxnId;
}
}
int remoteRecover = json.getInt("action");
if (remoteRecover != recover) {
String msg = "Database actions are not consistent. Remote node action is not 'recover'. " + "Please enter the same database action on the command-line.";
VoltDB.crashLocalVoltDB(msg, false, null);
}
JSONArray snapInfos = json.getJSONArray("snapInfos");
int snapInfoCnt = snapInfos.length();
for (int i = 0; i < snapInfoCnt; i++) {
JSONObject jsonInfo = snapInfos.getJSONObject(i);
SnapshotInfo info = new SnapshotInfo(jsonInfo);
Set<SnapshotInfo> fragments = snapshotFragments.get(info.nonce);
if (fragments == null) {
fragments = new HashSet<SnapshotInfo>();
snapshotFragments.put(info.nonce, fragments);
}
fragments.add(info);
}
}
return clStartTxnId;
} catch (JSONException je) {
VoltDB.crashLocalVoltDB("Error exchanging snapshot information", true, je);
}
throw new RuntimeException("impossible");
}
use of org.json_voltpatches.JSONException in project voltdb by VoltDB.
the class SnapshotDaemon method initiateNextSnapshot.
private void initiateNextSnapshot(long now) {
setState(State.SNAPSHOTTING);
m_lastSysprocInvocation = now;
final Date nowDate = new Date(now);
final String dateString = m_dateFormat.format(nowDate);
final String nonce = m_prefix + dateString;
JSONObject jsObj = new JSONObject();
try {
jsObj.put(SnapshotUtil.JSON_PATH, m_path);
jsObj.put(SnapshotUtil.JSON_PATH_TYPE, SnapshotPathType.SNAP_AUTO.toString());
jsObj.put(SnapshotUtil.JSON_NONCE, nonce);
jsObj.put("perPartitionTxnIds", retrievePerPartitionTransactionIds());
m_snapshots.offer(new Snapshot(m_path, SnapshotPathType.SNAP_AUTO, nonce, now));
long handle = m_nextCallbackHandle++;
m_procedureCallbacks.put(handle, new ProcedureCallback() {
@Override
public void clientCallback(final ClientResponse clientResponse) throws Exception {
m_lastInitiationTs = null;
processClientResponsePrivate(clientResponse);
}
});
SNAP_LOG.info("Requesting auto snapshot to path " + m_path + " nonce " + nonce);
initiateSnapshotSave(handle, new Object[] { jsObj.toString(4) }, false);
} catch (JSONException e) {
/*
* Should never happen, so fail fast
*/
VoltDB.crashLocalVoltDB("", false, e);
}
}
use of org.json_voltpatches.JSONException in project voltdb by VoltDB.
the class SnapshotDaemon method processSnapshotTruncationRequestCreated.
/*
* A ZK event occured requestion a truncation snapshot be taken
*/
private void processSnapshotTruncationRequestCreated(final WatchedEvent event) {
loggingLog.info("Snapshot truncation leader received snapshot truncation request");
// Get the truncation request ID which is the truncation request node path.
final String truncReqId;
try {
List<String> children = m_zk.getChildren(event.getPath(), false);
if (children.isEmpty()) {
loggingLog.error("Unable to retrieve truncation snapshot request id from ZK, log can't be truncated");
return;
}
truncReqId = ZKUtil.joinZKPath(event.getPath(), Collections.max(children));
} catch (Exception e) {
loggingLog.error("Unable to retrieve truncation snapshot request ID from ZK, log can't be truncated");
return;
}
final long now = System.currentTimeMillis();
final String nonce = Long.toString(now);
// TRAIL [TruncSnap:7] write current ts to request zk node data
try {
ByteBuffer payload = ByteBuffer.allocate(8);
payload.putLong(0, now);
m_zk.setData(VoltZK.request_truncation_snapshot, payload.array(), -1);
} catch (Exception e) {
//Cause a cascading failure?
VoltDB.crashLocalVoltDB("Setting data on the truncation snapshot request in ZK should never fail", true, e);
}
// for the snapshot save invocations
JSONObject jsObj = new JSONObject();
try {
assert truncReqId != null;
String sData = "";
JSONObject jsData = new JSONObject();
jsData.put("truncReqId", truncReqId);
sData = jsData.toString();
jsObj.put(SnapshotUtil.JSON_PATH, VoltDB.instance().getCommandLogSnapshotPath());
jsObj.put(SnapshotUtil.JSON_NONCE, nonce);
jsObj.put(SnapshotUtil.JSON_PATH_TYPE, SnapshotPathType.SNAP_CL);
jsObj.put("perPartitionTxnIds", retrievePerPartitionTransactionIds());
jsObj.put("data", sData);
} catch (JSONException e) {
/*
* Should never happen, so fail fast
*/
VoltDB.crashLocalVoltDB("", true, e);
}
// for the snapshot save invocations
long handle = m_nextCallbackHandle++;
// for the snapshot save invocation
m_procedureCallbacks.put(handle, new ProcedureCallback() {
@Override
public void clientCallback(ClientResponse clientResponse) throws Exception {
m_lastInitiationTs = null;
if (clientResponse.getStatus() != ClientResponse.SUCCESS) {
loggingLog.warn("Attempt to initiate a truncation snapshot was not successful: " + clientResponse.getStatusString());
loggingLog.warn("Retrying log truncation snapshot in 5 minutes");
/*
* TRAIL [TruncSnap:8] (callback) on failed response try again in a few minute
*/
m_es.schedule(new Runnable() {
@Override
public void run() {
try {
processTruncationRequestEvent(event);
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Error processing snapshot truncation request event", true, e);
}
}
}, 5, TimeUnit.MINUTES);
return;
}
final VoltTable[] results = clientResponse.getResults();
final VoltTable result = results[0];
boolean success = true;
final String err = SnapshotUtil.didSnapshotRequestFailWithErr(results);
if (err != null) {
if (err.trim().equalsIgnoreCase("SNAPSHOT IN PROGRESS")) {
loggingLog.info("Snapshot is in progress");
} else {
loggingLog.warn("Snapshot failed with failure response: " + err);
}
success = false;
}
//assert(result.getColumnName(1).equals("TABLE"));
if (success) {
while (result.advanceRow()) {
if (!result.getString("RESULT").equals("SUCCESS")) {
success = false;
loggingLog.warn("Snapshot save feasibility test failed for host " + result.getLong("HOST_ID") + " table " + result.getString("TABLE") + " with error message " + result.getString("ERR_MSG"));
}
}
}
if (success) {
loggingLog.info("Snapshot initiation for log truncation was successful");
JSONObject obj = new JSONObject(clientResponse.getAppStatusString());
final long snapshotTxnId = Long.valueOf(obj.getLong("txnId"));
try {
boolean found = false;
ZKUtil.VoidCallback lastCallback = null;
for (String child : m_zk.getChildren(event.getPath(), false)) {
String requestId = ZKUtil.joinZKPath(event.getPath(), child);
found = found || requestId.equals(truncReqId);
lastCallback = new ZKUtil.VoidCallback();
m_zk.delete(requestId, -1, lastCallback, null);
}
if (lastCallback != null) {
try {
lastCallback.get();
} catch (KeeperException.NoNodeException ignoreIt) {
}
}
if (!found) {
VoltDB.crashLocalVoltDB("Could not match truncations snapshot request id while atepting its removal", true, null);
}
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Unexpected error deleting truncation snapshot request", true, e);
}
try {
TruncationSnapshotAttempt snapshotAttempt = m_truncationSnapshotAttempts.get(snapshotTxnId);
if (snapshotAttempt == null) {
snapshotAttempt = new TruncationSnapshotAttempt();
m_truncationSnapshotAttempts.put(snapshotTxnId, snapshotAttempt);
snapshotAttempt.pathType = SnapshotPathType.SNAP_CL.toString();
}
snapshotAttempt.nonce = nonce;
snapshotAttempt.path = VoltDB.instance().getCommandLogSnapshotPath();
} finally {
// TRAIL [TruncSnap:9] (callback) restart the whole request check cycle
try {
truncationRequestExistenceCheck();
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Unexpected error checking for existence of truncation snapshot request", true, e);
}
}
} else {
loggingLog.info("Retrying log truncation snapshot in 60 seconds");
/*
* TRAIL [TruncSnap:10] (callback) on table reported failure try again in a few minutes
*/
m_es.schedule(new Runnable() {
@Override
public void run() {
try {
processTruncationRequestEvent(event);
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Exception processing truncation request event", true, e);
}
}
}, 1, TimeUnit.MINUTES);
}
}
});
try {
loggingLog.info("Initiating @SnapshotSave for log truncation");
initiateSnapshotSave(handle, new Object[] { jsObj.toString(4) }, false);
} catch (JSONException e) {
/*
* Should never happen, so fail fast
*/
VoltDB.crashLocalVoltDB("", true, e);
}
return;
}
Aggregations