use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperMasterModel method deployJobRetry.
private void deployJobRetry(final ZooKeeperClient client, final String host, final Deployment deployment, int count, final String token) throws JobDoesNotExistException, JobAlreadyDeployedException, HostNotFoundException, JobPortAllocationConflictException, TokenVerificationException {
if (count == 3) {
throw new HeliosRuntimeException("3 failures (possibly concurrent modifications) while " + "deploying. Giving up.");
}
log.info("deploying {}: {} (retry={})", deployment, host, count);
final JobId id = deployment.getJobId();
final Job job = getJob(id);
if (job == null) {
throw new JobDoesNotExistException(id);
}
verifyToken(token, job);
final UUID operationId = UUID.randomUUID();
final String jobPath = Paths.configJob(id);
try {
Paths.configHostJob(host, id);
} catch (IllegalArgumentException e) {
throw new HostNotFoundException("Could not find Helios host '" + host + "'");
}
final String taskPath = Paths.configHostJob(host, id);
final String taskCreationPath = Paths.configHostJobCreation(host, id, operationId);
final List<Integer> staticPorts = staticPorts(job);
final Map<String, byte[]> portNodes = Maps.newHashMap();
final byte[] idJson = id.toJsonBytes();
for (final int port : staticPorts) {
final String path = Paths.configHostPort(host, port);
portNodes.put(path, idJson);
}
final Task task = new Task(job, deployment.getGoal(), deployment.getDeployerUser(), deployment.getDeployerMaster(), deployment.getDeploymentGroupName());
final List<ZooKeeperOperation> operations = Lists.newArrayList(check(jobPath), create(portNodes), create(Paths.configJobHost(id, host)));
// Attempt to read a task here.
try {
client.getNode(taskPath);
// if we get here the node exists already
throw new JobAlreadyDeployedException(host, id);
} catch (NoNodeException e) {
operations.add(create(taskPath, task));
operations.add(create(taskCreationPath));
} catch (KeeperException e) {
throw new HeliosRuntimeException("reading existing task description failed", e);
}
// TODO (dano): Failure handling is racy wrt agent and job modifications.
try {
client.transaction(operations);
log.info("deployed {}: {} (retry={})", deployment, host, count);
} catch (NoNodeException e) {
// Either the job, the host or the task went away
assertJobExists(client, id);
assertHostExists(client, host);
// If the job and host still exists, we likely tried to redeploy a job that had an UNDEPLOY
// goal and lost the race with the agent removing the task before we could set it. Retry.
deployJobRetry(client, host, deployment, count + 1, token);
} catch (NodeExistsException e) {
// Check for conflict due to transaction retry
try {
if (client.exists(taskCreationPath) != null) {
// Our creation operation node existed, we're done here
return;
}
} catch (KeeperException ex) {
throw new HeliosRuntimeException("checking job deployment failed", ex);
}
try {
// Check if the job was already deployed
if (client.stat(taskPath) != null) {
throw new JobAlreadyDeployedException(host, id);
}
} catch (KeeperException ex) {
throw new HeliosRuntimeException("checking job deployment failed", e);
}
// Check for static port collisions
for (final int port : staticPorts) {
checkForPortConflicts(client, host, port, id);
}
// Catch all for logic and ephemeral issues
throw new HeliosRuntimeException("deploying job failed", e);
} catch (KeeperException e) {
throw new HeliosRuntimeException("deploying job failed", e);
}
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperMasterModel method undeployJob.
/**
* Undeploys the job specified by {@code jobId} on {@code host}.
*/
@Override
public Deployment undeployJob(final String host, final JobId jobId, final String token) throws HostNotFoundException, JobNotDeployedException, TokenVerificationException {
log.info("undeploying {}: {}", jobId, host);
final ZooKeeperClient client = provider.get("undeployJob");
assertHostExists(client, host);
final Deployment deployment = getDeployment(host, jobId);
if (deployment == null) {
throw new JobNotDeployedException(host, jobId);
}
final Job job = getJob(client, jobId);
verifyToken(token, job);
final String configHostJobPath = Paths.configHostJob(host, jobId);
try {
// use listRecursive to remove both job node and its child creation node
final List<String> nodes = newArrayList(reverse(client.listRecursive(configHostJobPath)));
nodes.add(Paths.configJobHost(jobId, host));
final List<Integer> staticPorts = staticPorts(job);
for (final int port : staticPorts) {
nodes.add(Paths.configHostPort(host, port));
}
client.transaction(delete(nodes));
} catch (NoNodeException e) {
// throw an exception and handle it the same as if we discovered this earlier.
throw new JobNotDeployedException(host, jobId);
} catch (KeeperException e) {
throw new HeliosRuntimeException("Removing deployment failed", e);
}
return deployment;
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.
the class ZooKeeperMasterModel method getUndeployOperations.
private List<ZooKeeperOperation> getUndeployOperations(final ZooKeeperClient client, final String host, final JobId jobId, final String token) throws HostNotFoundException, JobNotDeployedException, TokenVerificationException {
assertHostExists(client, host);
final Deployment deployment = getDeployment(host, jobId);
if (deployment == null) {
throw new JobNotDeployedException(host, jobId);
}
final Job job = getJob(client, jobId);
verifyToken(token, job);
final String configHostJobPath = Paths.configHostJob(host, jobId);
try {
// use listRecursive to remove both job node and its child creation node
final List<String> nodes = newArrayList(reverse(client.listRecursive(configHostJobPath)));
nodes.add(Paths.configJobHost(jobId, host));
final List<Integer> staticPorts = staticPorts(job);
for (final int port : staticPorts) {
nodes.add(Paths.configHostPort(host, port));
}
return ImmutableList.of(delete(nodes));
} catch (NoNodeException e) {
// throw an exception and handle it the same as if we discovered this earlier.
throw new JobNotDeployedException(host, jobId);
} catch (KeeperException e) {
throw new HeliosRuntimeException("calculating undeploy operations failed", e);
}
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project pulsar by yahoo.
the class Namespaces method createNamespace.
@PUT
@Path("/{property}/{cluster}/{namespace}")
@ApiOperation(value = "Creates a new empty namespace with no policies attached.")
@ApiResponses(value = { @ApiResponse(code = 403, message = "Don't have admin permission"), @ApiResponse(code = 404, message = "Property or cluster or namespace doesn't exist"), @ApiResponse(code = 409, message = "Namespace already exists"), @ApiResponse(code = 412, message = "Namespace name is not valid") })
public void createNamespace(@PathParam("property") String property, @PathParam("cluster") String cluster, @PathParam("namespace") String namespace, BundlesData initialBundles) {
validateAdminAccessOnProperty(property);
validatePoliciesReadOnlyAccess();
// check is made at the time of setting replication.
if (!cluster.equals(GLOBAL_CLUSTER)) {
validateClusterForProperty(property, cluster);
}
if (!clusters().contains(cluster)) {
log.warn("[{}] Failed to create namespace. Cluster {} does not exist", clientAppId(), cluster);
throw new RestException(Status.NOT_FOUND, "Cluster does not exist");
}
try {
checkNotNull(propertiesCache().get(path("policies", property)));
} catch (NoNodeException nne) {
log.warn("[{}] Failed to create namespace. Property {} does not exist", clientAppId(), property);
throw new RestException(Status.NOT_FOUND, "Property does not exist");
} catch (RestException e) {
throw e;
} catch (Exception e) {
throw new RestException(e);
}
try {
NamedEntity.checkName(namespace);
policiesCache().invalidate(path("policies", property, cluster, namespace));
Policies policies = new Policies();
if (initialBundles != null && initialBundles.getNumBundles() > 0) {
if (initialBundles.getBoundaries() == null || initialBundles.getBoundaries().size() == 0) {
policies.bundles = getBundles(initialBundles.getNumBundles());
} else {
policies.bundles = validateBundlesData(initialBundles);
}
}
zkCreateOptimistic(path("policies", property, cluster, namespace), jsonMapper().writeValueAsBytes(policies));
log.info("[{}] Created namespace {}/{}/{}", clientAppId(), property, cluster, namespace);
} catch (KeeperException.NodeExistsException e) {
log.warn("[{}] Failed to create namespace {}/{}/{} - already exists", clientAppId(), property, cluster, namespace);
throw new RestException(Status.CONFLICT, "Namespace already exists");
} catch (IllegalArgumentException e) {
log.warn("[{}] Failed to create namespace with invalid name {}", clientAppId(), property, e);
throw new RestException(Status.PRECONDITION_FAILED, "Namespace name is not valid");
} catch (Exception e) {
log.error("[{}] Failed to create namespace {}/{}/{}", clientAppId(), property, cluster, namespace, e);
throw new RestException(e);
}
}
use of org.apache.zookeeper.KeeperException.NoNodeException in project pulsar by yahoo.
the class OwnershipCacheTest method testRemoveOwnership.
@Test
public void testRemoveOwnership() throws Exception {
OwnershipCache cache = new OwnershipCache(this.pulsar, bundleFactory);
NamespaceName testNs = new NamespaceName("pulsar/test/ns-7");
NamespaceBundle bundle = bundleFactory.getFullBundle(testNs);
// case 1: no one owns the namespace
assertFalse(cache.getOwnerAsync(bundle).get().isPresent());
cache.removeOwnership(bundle).get();
assertTrue(cache.getOwnedBundles().isEmpty());
// case 2: this broker owns the namespace
NamespaceEphemeralData data1 = cache.tryAcquiringOwnership(bundle).get();
assertEquals(data1.getNativeUrl(), selfBrokerUrl);
assertTrue(!data1.isDisabled());
assertTrue(cache.getOwnedBundles().size() == 1);
cache.removeOwnership(bundle);
Thread.sleep(500);
assertTrue(cache.getOwnedBundles().isEmpty());
Thread.sleep(500);
try {
zkCache.getZooKeeper().getData(ServiceUnitZkUtils.path(bundle), null, null);
fail("Should have failed");
} catch (NoNodeException nne) {
// OK
}
}
Aggregations