use of com.spotify.helios.master.HostNotFoundException in project helios by spotify.
the class ZooKeeperRegistrarServiceTest method testRetry.
@Test
public void testRetry() throws Exception {
final AtomicInteger counter = new AtomicInteger(0);
final ZooKeeperRegistrar zooKeeperRegistrar = createStubRegistrar(true, client -> {
final int count = counter.incrementAndGet();
if (count == 1) {
throw new KeeperException.ConnectionLossException();
}
if (count == 2) {
throw new HostNotFoundException("Host not found");
}
});
final ZooKeeperRegistrarService init = ZooKeeperRegistrarService.newBuilder().setZooKeeperClient(zkClient).setZooKeeperRegistrar(zooKeeperRegistrar).setRetryIntervalPolicy(retryIntervalPolicy).setSleeper(sleeper).build();
init.startUp();
assertNull(complete.get(30, SECONDS));
assertTrue("Count must have been called at least once", counter.get() > 1);
verify(sleeper, times(2)).sleep(longThat(both(greaterThanOrEqualTo(1L)).and(lessThanOrEqualTo(30L))));
}
use of com.spotify.helios.master.HostNotFoundException in project helios by spotify.
the class HostsResource method jobPut.
/**
* Sets the deployment of the job identified by its {@link JobId} on the host named by
* {@code host} to {@code deployment}.
* @param host The host to deploy to.
* @param jobId The job to deploy.
* @param deployment Deployment information.
* @param username The user deploying.
* @param token The authorization token for this deployment.
* @return The response.
*/
@PUT
@Path("/{host}/jobs/{job}")
@Produces(APPLICATION_JSON)
@Timed
@ExceptionMetered
public JobDeployResponse jobPut(@PathParam("host") final String host, @PathParam("job") final JobId jobId, @Valid final Deployment deployment, @RequestUser final String username, @QueryParam("token") @DefaultValue(EMPTY_TOKEN) final String token) {
if (!jobId.isFullyQualified()) {
throw badRequest(new JobDeployResponse(JobDeployResponse.Status.INVALID_ID, host, jobId));
}
try {
final Deployment actualDeployment = deployment.toBuilder().setDeployerUser(username).build();
model.deployJob(host, actualDeployment, token);
return new JobDeployResponse(JobDeployResponse.Status.OK, host, jobId);
} catch (JobAlreadyDeployedException e) {
throw badRequest(new JobDeployResponse(JobDeployResponse.Status.JOB_ALREADY_DEPLOYED, host, jobId));
} catch (HostNotFoundException e) {
throw badRequest(new JobDeployResponse(JobDeployResponse.Status.HOST_NOT_FOUND, host, jobId));
} catch (JobDoesNotExistException e) {
throw badRequest(new JobDeployResponse(JobDeployResponse.Status.JOB_NOT_FOUND, host, jobId));
} catch (JobPortAllocationConflictException e) {
throw badRequest(new JobDeployResponse(JobDeployResponse.Status.PORT_CONFLICT, host, jobId));
} catch (TokenVerificationException e) {
throw forbidden(new JobDeployResponse(JobDeployResponse.Status.FORBIDDEN, host, jobId));
}
}
use of com.spotify.helios.master.HostNotFoundException in project helios by spotify.
the class ZooKeeperRegistrarUtil method deregisterHost.
public static void deregisterHost(final ZooKeeperClient client, final String host) throws HostNotFoundException, HostStillInUseException {
log.info("deregistering host: {}", host);
// TODO (dano): handle retry failures
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
if (client.exists(Paths.configHost(host)) == null) {
throw new HostNotFoundException("host [" + host + "] does not exist");
}
// Remove all jobs deployed to this host
final List<String> jobs = safeGetChildren(client, Paths.configHostJobs(host));
for (final String jobString : jobs) {
final JobId job = JobId.fromString(jobString);
final String hostJobPath = Paths.configHostJob(host, job);
final List<String> nodes = safeListRecursive(client, hostJobPath);
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
if (client.exists(Paths.configJobHost(job, host)) != null) {
operations.add(delete(Paths.configJobHost(job, host)));
}
// Clean out the history for each job
final List<String> history = safeListRecursive(client, Paths.historyJobHost(job, host));
for (final String s : reverse(history)) {
operations.add(delete(s));
}
}
operations.add(delete(Paths.configHostJobs(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// Remove port allocations
final List<String> ports = safeGetChildren(client, Paths.configHostPorts(host));
for (final String port : ports) {
operations.add(delete(Paths.configHostPort(host, Integer.valueOf(port))));
}
operations.add(delete(Paths.configHostPorts(host)));
// Remove host id
final String idPath = Paths.configHostId(host);
if (client.exists(idPath) != null) {
operations.add(delete(idPath));
}
// Remove host config root
operations.add(delete(Paths.configHost(host)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
use of com.spotify.helios.master.HostNotFoundException in project helios by spotify.
the class ZooKeeperRegistrarUtil method reRegisterHost.
/**
* Re-register an agent with a different host id. Will remove the existing status of the agent
* but preserve any jobs deployed to the host and their history.
* @param client ZooKeeperClient
* @param host Host
* @param hostId ID of the host
* @throws HostNotFoundException If the hostname we are trying to re-register as doesn't exist.
* @throws KeeperException If an unexpected zookeeper error occurs.
*/
public static void reRegisterHost(final ZooKeeperClient client, final String host, final String hostId) throws HostNotFoundException, KeeperException {
// * Delete everything in the /status/hosts/<hostname> subtree
// * Don't delete any history for the job (on the host)
// * DON'T touch anything in the /config/hosts/<hostname> subtree, except updating the host id
log.info("re-registering host: {}, new host id: {}", host, hostId);
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
// Check that the host exists in ZK
operations.add(check(Paths.configHost(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// ...and re-create the /status/hosts/<host>/jobs node + parent
operations.add(create(Paths.statusHost(host)));
operations.add(create(Paths.statusHostJobs(host)));
// Update the host ID
// We don't have WRITE permissions to the node, so delete and re-create it.
operations.add(delete(Paths.configHostId(host)));
operations.add(create(Paths.configHostId(host), hostId.getBytes(UTF_8)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
use of com.spotify.helios.master.HostNotFoundException in project helios by spotify.
the class ExpiredJobReaper method runOneIteration.
@Override
public void runOneIteration() {
for (final Entry<JobId, Job> entry : masterModel.getJobs().entrySet()) {
final JobId jobId = entry.getKey();
final Job job = entry.getValue();
if (job.getExpires() == null) {
//noinspection UnnecessaryContinue
continue;
} else if (job.getExpires().getTime() <= clock.now().getMillis()) {
final JobStatus status = masterModel.getJobStatus(jobId);
final List<String> hosts = ImmutableList.copyOf(status.getDeployments().keySet());
for (final String host : hosts) {
try {
masterModel.undeployJob(host, jobId, job.getToken());
} catch (HostNotFoundException e) {
log.error("couldn't undeploy job {} from host {} when it hit deadline", jobId, host, e);
} catch (JobNotDeployedException e) {
log.debug("job {} was already undeployed when it hit deadline", jobId, e);
} catch (TokenVerificationException e) {
log.error("couldn't undeploy job {} from host {} because token verification failed", jobId, host, e);
}
}
try {
masterModel.removeJob(jobId, job.getToken());
} catch (JobDoesNotExistException e) {
log.debug("job {} was already removed when it hit deadline", jobId, e);
} catch (JobStillDeployedException e) {
log.debug("job {} still deployed on some host(s) after expiry reap", jobId, e);
} catch (TokenVerificationException e) {
log.error("couldn't remove job {} because token verification failed", jobId, e);
}
}
}
}
Aggregations