use of org.apache.zookeeper.KeeperException.NoNodeException in project zookeeper by apache.
the class LoadFromLogTest method testRestoreWithTransactionErrors.
/**
* Test we can restore a snapshot that has errors and data ahead of the zxid
* of the snapshot file.
*/
@Test
public void testRestoreWithTransactionErrors() throws Exception {
final String hostPort = HOST + PortAssignment.unique();
// setup a single server cluster
File tmpDir = ClientBase.createTmpDir();
ClientBase.setupTestEnv();
ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
SyncRequestProcessor.setSnapCount(10000);
final int PORT = Integer.parseInt(hostPort.split(":")[1]);
ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
f.startup(zks);
Assert.assertTrue("waiting for server being up ", ClientBase.waitForServerUp(hostPort, CONNECTION_TIMEOUT));
ZooKeeper zk = getConnectedZkClient(hostPort);
// generate some transactions
try {
for (int i = 0; i < NUM_MESSAGES; i++) {
try {
zk.create("/invaliddir/test-", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
} catch (NoNodeException e) {
//Expected
}
}
} finally {
zk.close();
}
// force the zxid to be behind the content
zks.getZKDatabase().setlastProcessedZxid(zks.getZKDatabase().getDataTreeLastProcessedZxid() - 10);
LOG.info("Set lastProcessedZxid to " + zks.getZKDatabase().getDataTreeLastProcessedZxid());
// Force snapshot and restore
zks.takeSnapshot();
zks.shutdown();
f.shutdown();
zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
SyncRequestProcessor.setSnapCount(10000);
f = ServerCnxnFactory.createFactory(PORT, -1);
f.startup(zks);
Assert.assertTrue("waiting for server being up ", ClientBase.waitForServerUp(hostPort, CONNECTION_TIMEOUT));
f.shutdown();
zks.shutdown();
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperMasterModel method removeJob.
/**
* Deletes a job from ZooKeeper. Ensures that job is not currently running anywhere.
*/
@Override
public Job removeJob(final JobId id, final String token) throws JobDoesNotExistException, JobStillDeployedException, TokenVerificationException {
log.info("removing job: id={}", id);
final ZooKeeperClient client = provider.get("removeJob");
final Job job = getJob(client, id);
if (job == null) {
throw new JobDoesNotExistException(id);
}
verifyToken(token, job);
// TODO (dano): handle retry failures
try {
final ImmutableList.Builder<ZooKeeperOperation> operations = ImmutableList.builder();
final UUID jobCreationOperationId = getJobCreation(client, id);
if (jobCreationOperationId != null) {
operations.add(delete(Paths.configJobCreation(id, jobCreationOperationId)));
}
operations.add(delete(Paths.configJobHosts(id)), delete(Paths.configJobRefShort(id)), delete(Paths.configJob(id)), // change down the tree. Effectively, make it that version == cVersion.
set(Paths.configJobs(), UUID.randomUUID().toString().getBytes()));
client.transaction(operations.build());
} catch (final NoNodeException e) {
throw new JobDoesNotExistException(id);
} catch (final NotEmptyException e) {
throw new JobStillDeployedException(id, listJobHosts(client, id));
} catch (final KeeperException e) {
throw new HeliosRuntimeException("removing job " + id + " failed", e);
}
// Delete job history on a best effort basis
try {
client.deleteRecursive(Paths.historyJob(id));
} catch (NoNodeException ignored) {
// There's no history for this job
} catch (KeeperException e) {
log.warn("error removing job history for job {}", id, e);
}
return job;
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperMasterModel method getDeploymentGroupTasks.
private Map<String, VersionedValue<DeploymentGroupTasks>> getDeploymentGroupTasks(final ZooKeeperClient client) {
final String folder = Paths.statusDeploymentGroupTasks();
try {
final List<String> names;
try {
names = client.getChildren(folder);
} catch (NoNodeException e) {
return Collections.emptyMap();
}
final Map<String, VersionedValue<DeploymentGroupTasks>> ret = Maps.newHashMap();
for (final String name : names) {
final String path = Paths.statusDeploymentGroupTasks(name);
try {
final Node node = client.getNode(path);
final byte[] data = node.getBytes();
final int version = node.getStat().getVersion();
if (data.length == 0) {
// This can happen because of ensurePath creates an empty node
log.debug("Ignoring empty deployment group tasks {}", name);
} else {
final DeploymentGroupTasks val = parse(data, DeploymentGroupTasks.class);
ret.put(name, VersionedValue.of(val, version));
}
} catch (NoNodeException e) {
// Ignore, the deployment group was deleted before we had a chance to read it.
log.debug("Ignoring deleted deployment group tasks {}", name);
}
}
return ret;
} catch (KeeperException | IOException e) {
throw new HeliosRuntimeException("getting deployment group tasks failed", e);
}
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperRegistrarUtil method deregisterHost.
public static void deregisterHost(final ZooKeeperClient client, final String host) throws HostNotFoundException, HostStillInUseException {
log.info("deregistering host: {}", host);
// TODO (dano): handle retry failures
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
if (client.exists(Paths.configHost(host)) == null) {
throw new HostNotFoundException("host [" + host + "] does not exist");
}
// Remove all jobs deployed to this host
final List<String> jobs = safeGetChildren(client, Paths.configHostJobs(host));
for (final String jobString : jobs) {
final JobId job = JobId.fromString(jobString);
final String hostJobPath = Paths.configHostJob(host, job);
final List<String> nodes = safeListRecursive(client, hostJobPath);
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
if (client.exists(Paths.configJobHost(job, host)) != null) {
operations.add(delete(Paths.configJobHost(job, host)));
}
// Clean out the history for each job
final List<String> history = safeListRecursive(client, Paths.historyJobHost(job, host));
for (final String s : reverse(history)) {
operations.add(delete(s));
}
}
operations.add(delete(Paths.configHostJobs(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// Remove port allocations
final List<String> ports = safeGetChildren(client, Paths.configHostPorts(host));
for (final String port : ports) {
operations.add(delete(Paths.configHostPort(host, Integer.valueOf(port))));
}
operations.add(delete(Paths.configHostPorts(host)));
// Remove host id
final String idPath = Paths.configHostId(host);
if (client.exists(idPath) != null) {
operations.add(delete(idPath));
}
// Remove host config root
operations.add(delete(Paths.configHost(host)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperRegistrarUtil method reRegisterHost.
/**
* Re-register an agent with a different host id. Will remove the existing status of the agent
* but preserve any jobs deployed to the host and their history.
* @param client ZooKeeperClient
* @param host Host
* @param hostId ID of the host
* @throws HostNotFoundException If the hostname we are trying to re-register as doesn't exist.
* @throws KeeperException If an unexpected zookeeper error occurs.
*/
public static void reRegisterHost(final ZooKeeperClient client, final String host, final String hostId) throws HostNotFoundException, KeeperException {
// * Delete everything in the /status/hosts/<hostname> subtree
// * Don't delete any history for the job (on the host)
// * DON'T touch anything in the /config/hosts/<hostname> subtree, except updating the host id
log.info("re-registering host: {}, new host id: {}", host, hostId);
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
// Check that the host exists in ZK
operations.add(check(Paths.configHost(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// ...and re-create the /status/hosts/<host>/jobs node + parent
operations.add(create(Paths.statusHost(host)));
operations.add(create(Paths.statusHostJobs(host)));
// Update the host ID
// We don't have WRITE permissions to the node, so delete and re-create it.
operations.add(delete(Paths.configHostId(host)));
operations.add(create(Paths.configHostId(host), hostId.getBytes(UTF_8)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
Aggregations