use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.
the class ZooKeeperHeliosFailoverTest method undeploy.
private void undeploy(final JobId jobId) throws Exception {
// Check job status can be queried
final JobStatus jobStatus = client.jobStatus(jobId).get();
assertEquals(RUNNING, jobStatus.getTaskStatuses().get(testHost()).getState());
// Undeploy the job
final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
// Wait for the task to disappear
awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
}
use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.
the class ExpiredJobReaper method runOneIteration.
@Override
public void runOneIteration() {
for (final Entry<JobId, Job> entry : masterModel.getJobs().entrySet()) {
final JobId jobId = entry.getKey();
final Job job = entry.getValue();
if (job.getExpires() == null) {
//noinspection UnnecessaryContinue
continue;
} else if (job.getExpires().getTime() <= clock.now().getMillis()) {
final JobStatus status = masterModel.getJobStatus(jobId);
final List<String> hosts = ImmutableList.copyOf(status.getDeployments().keySet());
for (final String host : hosts) {
try {
masterModel.undeployJob(host, jobId, job.getToken());
} catch (HostNotFoundException e) {
log.error("couldn't undeploy job {} from host {} when it hit deadline", jobId, host, e);
} catch (JobNotDeployedException e) {
log.debug("job {} was already undeployed when it hit deadline", jobId, e);
} catch (TokenVerificationException e) {
log.error("couldn't undeploy job {} from host {} because token verification failed", jobId, host, e);
}
}
try {
masterModel.removeJob(jobId, job.getToken());
} catch (JobDoesNotExistException e) {
log.debug("job {} was already removed when it hit deadline", jobId, e);
} catch (JobStillDeployedException e) {
log.debug("job {} still deployed on some host(s) after expiry reap", jobId, e);
} catch (TokenVerificationException e) {
log.error("couldn't remove job {} because token verification failed", jobId, e);
}
}
}
}
use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.
the class SystemTestBase method undeployJob.
protected void undeployJob(final JobId jobId, final String host) throws Exception {
final String undeployOutput = cli("undeploy", jobId.toString(), host);
assertThat(undeployOutput, containsString(host + ": done"));
final String output = cli("status", "--host", host, "--json");
final Map<JobId, JobStatus> statuses = Json.readUnchecked(output, new TypeReference<Map<JobId, JobStatus>>() {
});
final JobStatus status = statuses.get(jobId);
assertTrue(status == null || status.getDeployments().get(host) == null);
}
use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.
the class FlappingTest method test.
@Test
public void test() throws Exception {
// CircleCI boxes are too slow -- the job doesn't stop or restart fast enough to ever flap
assumeFalse(isCircleCi());
startDefaultMaster();
final String host = testHost();
startDefaultAgent(host);
final HeliosClient client = defaultClient();
awaitHostStatus(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
final Job flapper = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(asList("nc", "-p", "4711", "-l")).addPort("poke", PortMapping.of(4711)).build();
final JobId jobId = createJob(flapper);
deployJob(jobId, host);
awaitTaskState(jobId, host, RUNNING);
// Poke the container to make it exit until it's classified as flapping
Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<Object>() {
@Override
public Object call() throws Exception {
final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(host);
if (taskStatus.getThrottled() == FLAPPING) {
return true;
}
final PortMapping port = taskStatus.getPorts().get("poke");
assert port.getExternalPort() != null;
poke(port.getExternalPort());
return null;
}
});
// Verify that the job recovers after we stop poking
awaitJobThrottle(client, host, jobId, NO, LONG_WAIT_SECONDS, SECONDS);
}
use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.
the class HealthCheckTest method testContainerDiesDuringHealthcheck.
@Test
public void testContainerDiesDuringHealthcheck() throws Exception {
startDefaultMaster();
final HeliosClient client = defaultClient();
startDefaultAgent(testHost(), "--service-registry=" + registryAddress);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
final HealthCheck healthCheck = TcpHealthCheck.of("health");
final Job job = pokeJob(healthCheck);
final JobId jobId = createJob(job);
deployJob(jobId, testHost());
awaitTaskState(jobId, testHost(), HEALTHCHECKING);
// kill the underlying container
final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
getNewDockerClient().killContainer(taskStatus.getContainerId());
// ensure the job is marked as failed
final int timeout = WAIT_TIMEOUT_SECONDS;
Polling.await(timeout, SECONDS, new Callable<Object>() {
@Override
public Object call() throws Exception {
final TaskStatusEvents jobHistory = getOrNull(client.jobHistory(jobId));
for (final TaskStatusEvent event : jobHistory.getEvents()) {
if (event.getStatus().getState() == FAILED) {
return true;
}
}
return null;
}
});
// wait for the job to come back up and start healthchecking again
awaitTaskState(jobId, testHost(), HEALTHCHECKING);
pokeAndVerifyRegistration(client, jobId, timeout);
}
Aggregations