use of org.apache.zookeeper_voltpatches.ZooKeeper in project voltdb by VoltDB.
the class SnapshotSaveAPI method logParticipatingHostCount.
/**
* Once participating host count is set, SnapshotCompletionMonitor can check this ZK node to
* determine whether the snapshot has finished or not.
*
* This should only be called when all participants have responded. It is possible that some
* hosts finish taking snapshot before the coordinator logs the participating host count. In
* this case, the host count would have been decremented multiple times already. To make sure
* finished hosts are logged correctly, this method adds participating host count + 1 to the
* current host count.
*
* @param txnId The snapshot txnId
* @param participantCount The number of hosts participating in this snapshot
*/
public static void logParticipatingHostCount(long txnId, int participantCount) {
ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK();
final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId;
boolean success = false;
while (!success) {
Stat stat = new Stat();
byte[] data = null;
try {
data = zk.getData(snapshotPath, false, stat);
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.NONODE) {
// If snapshot creation failed for some reason, the node won't exist. ignore
return;
}
VoltDB.crashLocalVoltDB("Failed to get snapshot completion node", true, e);
} catch (InterruptedException e) {
VoltDB.crashLocalVoltDB("Interrupted getting snapshot completion node", true, e);
}
if (data == null) {
VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null);
}
try {
JSONObject jsonObj = new JSONObject(new String(data, Charsets.UTF_8));
if (jsonObj.getLong("txnId") != txnId) {
VoltDB.crashLocalVoltDB("TxnId should match", false, null);
}
int hostCount = jsonObj.getInt("hostCount");
// +1 because hostCount was initialized to -1
jsonObj.put("hostCount", hostCount + participantCount + 1);
zk.setData(snapshotPath, jsonObj.toString(4).getBytes(Charsets.UTF_8), stat.getVersion());
} catch (KeeperException.BadVersionException e) {
continue;
} catch (Exception e) {
VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e);
}
success = true;
}
}
use of org.apache.zookeeper_voltpatches.ZooKeeper in project voltdb by VoltDB.
the class RealVoltDB method recoveryComplete.
@Override
public synchronized void recoveryComplete(String requestId) {
assert (m_rejoinDataPending == false);
if (m_rejoining) {
if (m_rejoinTruncationReqId.compareTo(requestId) <= 0) {
String actionName = m_joining ? "join" : "rejoin";
// remove the rejoin blocker
CoreZK.removeRejoinNodeIndicatorForHost(m_messenger.getZK(), m_myHostId);
consoleLog.info(String.format("Node %s completed", actionName));
m_rejoinTruncationReqId = null;
m_rejoining = false;
} else {
// don't flip the m_rejoining state, all truncation snapshot completions will call back to here.
try {
final ZooKeeper zk = m_messenger.getZK();
String requestNode = zk.create(VoltZK.request_truncation_snapshot_node, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
if (m_rejoinTruncationReqId == null) {
m_rejoinTruncationReqId = requestNode;
}
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Unable to retry post-rejoin truncation snapshot request.", true, e);
}
}
}
}
use of org.apache.zookeeper_voltpatches.ZooKeeper in project voltdb by VoltDB.
the class SnapshotSiteProcessor method logSnapshotCompleteToZK.
private static void logSnapshotCompleteToZK(long txnId, boolean snapshotSuccess, ExtensibleSnapshotDigestData extraSnapshotData) {
ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK();
// Timeout after 10 minutes
final long endTime = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(10);
final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId;
boolean success = false;
while (!success) {
if (System.currentTimeMillis() > endTime) {
VoltDB.crashLocalVoltDB("Timed out logging snapshot completion to ZK");
}
Stat stat = new Stat();
byte[] data = null;
try {
data = zk.getData(snapshotPath, false, stat);
} catch (NoNodeException e) {
// if the node doesn't exist yet, retry
continue;
} catch (Exception e) {
VoltDB.crashLocalVoltDB("This ZK get should never fail", true, e);
}
if (data == null) {
VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null);
}
try {
JSONObject jsonObj = new JSONObject(new String(data, "UTF-8"));
if (jsonObj.getLong("txnId") != txnId) {
VoltDB.crashLocalVoltDB("TxnId should match", false, null);
}
int remainingHosts = jsonObj.getInt("hostCount") - 1;
jsonObj.put("hostCount", remainingHosts);
jsonObj.put("didSucceed", snapshotSuccess);
if (!snapshotSuccess) {
jsonObj.put("isTruncation", false);
}
extraSnapshotData.mergeToZooKeeper(jsonObj, SNAP_LOG);
byte[] zkData = jsonObj.toString().getBytes("UTF-8");
if (zkData.length > 5000000) {
SNAP_LOG.warn("ZooKeeper node for snapshot digest unexpectedly large: " + zkData.length);
}
zk.setData(snapshotPath, zkData, stat.getVersion());
} catch (KeeperException.BadVersionException e) {
continue;
} catch (Exception e) {
VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e);
}
success = true;
}
/*
* If we are running without command logging there will be no consumer for
* the completed snapshot messages. Consume them here to bound space usage in ZK.
*/
try {
TreeSet<String> snapshots = new TreeSet<String>(zk.getChildren(VoltZK.completed_snapshots, false));
while (snapshots.size() > 30) {
try {
zk.delete(VoltZK.completed_snapshots + "/" + snapshots.first(), -1);
} catch (NoNodeException e) {
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Deleting a snapshot completion record from ZK should only fail with NoNodeException", true, e);
}
snapshots.remove(snapshots.first());
}
} catch (Exception e) {
VoltDB.crashLocalVoltDB("Retrieving list of completed snapshots from ZK should never fail", true, e);
}
}
use of org.apache.zookeeper_voltpatches.ZooKeeper in project voltdb by VoltDB.
the class MockVoltDB method validateStartAction.
public void validateStartAction() {
try {
ZooKeeper zk = m_hostMessenger.getZK();
boolean initCompleted = zk.exists(VoltZK.init_completed, false) != null;
List<String> children = zk.getChildren(VoltZK.start_action, new StartActionWatcher(), null);
if (!children.isEmpty()) {
for (String child : children) {
byte[] data = zk.getData(VoltZK.start_action + "/" + child, false, null);
if (data == null) {
VoltDB.crashLocalVoltDB("Couldn't find " + VoltZK.start_action + "/" + child);
}
String startAction = new String(data);
if ((startAction.equals(StartAction.JOIN.toString()) || startAction.equals(StartAction.REJOIN.toString()) || startAction.equals(StartAction.LIVE_REJOIN.toString())) && !initCompleted) {
int nodeId = VoltZK.getHostIDFromChildName(child);
if (nodeId == m_hostMessenger.getHostId()) {
VoltDB.crashLocalVoltDB("This node was started with start action " + startAction + " during cluster creation. All nodes should be started with matching " + "create or recover actions when bring up a cluster. Join and Rejoin " + "are for adding nodes to an already running cluster.");
} else {
logger.warn("Node " + nodeId + " tried to " + startAction + " cluster but it is not allowed during cluster creation. " + "All nodes should be started with matching create or recover actions when bring up a cluster. " + "Join and rejoin are for adding nodes to an already running cluster.");
}
}
}
}
} catch (KeeperException e) {
logger.error("Failed to validate the start actions:" + e.getMessage());
} catch (InterruptedException e) {
VoltDB.crashLocalVoltDB("Interrupted during start action validation:" + e.getMessage(), true, e);
}
}
use of org.apache.zookeeper_voltpatches.ZooKeeper in project voltdb by VoltDB.
the class TestMapCache method testDeleteChild.
@Test
public void testDeleteChild() throws Exception {
ZooKeeper zk = getClient(0);
configure("/cache02", zk);
MapCache dut = new MapCache(zk, "/cache02");
dut.start(true);
Map<String, JSONObject> cache = dut.pointInTimeCache();
assertEquals("3 items cached.", 3, cache.size());
zk.delete("/cache02/bb", -1);
while (true) {
cache = dut.pointInTimeCache();
if (cache.size() == 3) {
Thread.sleep(1);
} else {
break;
}
}
assertEquals("Item removed", 2, cache.size());
assertEquals(null, cache.get("/cache02/bb"));
assertEquals("aaval", cache.get("/cache02/aa").get("key"));
assertEquals("ccval", cache.get("/cache02/cc").get("key"));
dut.shutdown();
zk.close();
}
Aggregations