Search in sources :

Example 16 with JobStatus

use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.

the class ZooKeeperHeliosFailoverTest method undeploy.

private void undeploy(final JobId jobId) throws Exception {
    // Check job status can be queried
    final JobStatus jobStatus = client.jobStatus(jobId).get();
    assertEquals(RUNNING, jobStatus.getTaskStatuses().get(testHost()).getState());
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Wait for the task to disappear
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse)

Example 17 with JobStatus

use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.

the class ExpiredJobReaper method runOneIteration.

@Override
public void runOneIteration() {
    for (final Entry<JobId, Job> entry : masterModel.getJobs().entrySet()) {
        final JobId jobId = entry.getKey();
        final Job job = entry.getValue();
        if (job.getExpires() == null) {
            //noinspection UnnecessaryContinue
            continue;
        } else if (job.getExpires().getTime() <= clock.now().getMillis()) {
            final JobStatus status = masterModel.getJobStatus(jobId);
            final List<String> hosts = ImmutableList.copyOf(status.getDeployments().keySet());
            for (final String host : hosts) {
                try {
                    masterModel.undeployJob(host, jobId, job.getToken());
                } catch (HostNotFoundException e) {
                    log.error("couldn't undeploy job {} from host {} when it hit deadline", jobId, host, e);
                } catch (JobNotDeployedException e) {
                    log.debug("job {} was already undeployed when it hit deadline", jobId, e);
                } catch (TokenVerificationException e) {
                    log.error("couldn't undeploy job {} from host {} because token verification failed", jobId, host, e);
                }
            }
            try {
                masterModel.removeJob(jobId, job.getToken());
            } catch (JobDoesNotExistException e) {
                log.debug("job {} was already removed when it hit deadline", jobId, e);
            } catch (JobStillDeployedException e) {
                log.debug("job {} still deployed on some host(s) after expiry reap", jobId, e);
            } catch (TokenVerificationException e) {
                log.error("couldn't remove job {} because token verification failed", jobId, e);
            }
        }
    }
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) JobDoesNotExistException(com.spotify.helios.master.JobDoesNotExistException) HostNotFoundException(com.spotify.helios.master.HostNotFoundException) TokenVerificationException(com.spotify.helios.master.TokenVerificationException) JobStillDeployedException(com.spotify.helios.master.JobStillDeployedException) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) JobNotDeployedException(com.spotify.helios.master.JobNotDeployedException) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId)

Example 18 with JobStatus

use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.

the class SystemTestBase method undeployJob.

protected void undeployJob(final JobId jobId, final String host) throws Exception {
    final String undeployOutput = cli("undeploy", jobId.toString(), host);
    assertThat(undeployOutput, containsString(host + ": done"));
    final String output = cli("status", "--host", host, "--json");
    final Map<JobId, JobStatus> statuses = Json.readUnchecked(output, new TypeReference<Map<JobId, JobStatus>>() {
    });
    final JobStatus status = statuses.get(jobId);
    assertTrue(status == null || status.getDeployments().get(host) == null);
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) Matchers.containsString(org.hamcrest.Matchers.containsString) Integer.toHexString(java.lang.Integer.toHexString) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) JobId(com.spotify.helios.common.descriptors.JobId)

Example 19 with JobStatus

use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.

the class FlappingTest method test.

@Test
public void test() throws Exception {
    // CircleCI boxes are too slow -- the job doesn't stop or restart fast enough to ever flap
    assumeFalse(isCircleCi());
    startDefaultMaster();
    final String host = testHost();
    startDefaultAgent(host);
    final HeliosClient client = defaultClient();
    awaitHostStatus(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
    final Job flapper = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(asList("nc", "-p", "4711", "-l")).addPort("poke", PortMapping.of(4711)).build();
    final JobId jobId = createJob(flapper);
    deployJob(jobId, host);
    awaitTaskState(jobId, host, RUNNING);
    // Poke the container to make it exit until it's classified as flapping
    Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<Object>() {

        @Override
        public Object call() throws Exception {
            final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
            final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(host);
            if (taskStatus.getThrottled() == FLAPPING) {
                return true;
            }
            final PortMapping port = taskStatus.getPorts().get("poke");
            assert port.getExternalPort() != null;
            poke(port.getExternalPort());
            return null;
        }
    });
    // Verify that the job recovers after we stop poking
    awaitJobThrottle(client, host, jobId, NO, LONG_WAIT_SECONDS, SECONDS);
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) PortMapping(com.spotify.helios.common.descriptors.PortMapping) HeliosClient(com.spotify.helios.client.HeliosClient) Job(com.spotify.helios.common.descriptors.Job) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobId(com.spotify.helios.common.descriptors.JobId) IOException(java.io.IOException) Test(org.junit.Test)

Example 20 with JobStatus

use of com.spotify.helios.common.descriptors.JobStatus in project helios by spotify.

the class HealthCheckTest method testContainerDiesDuringHealthcheck.

@Test
public void testContainerDiesDuringHealthcheck() throws Exception {
    startDefaultMaster();
    final HeliosClient client = defaultClient();
    startDefaultAgent(testHost(), "--service-registry=" + registryAddress);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    final HealthCheck healthCheck = TcpHealthCheck.of("health");
    final Job job = pokeJob(healthCheck);
    final JobId jobId = createJob(job);
    deployJob(jobId, testHost());
    awaitTaskState(jobId, testHost(), HEALTHCHECKING);
    // kill the underlying container
    final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
    final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
    getNewDockerClient().killContainer(taskStatus.getContainerId());
    // ensure the job is marked as failed
    final int timeout = WAIT_TIMEOUT_SECONDS;
    Polling.await(timeout, SECONDS, new Callable<Object>() {

        @Override
        public Object call() throws Exception {
            final TaskStatusEvents jobHistory = getOrNull(client.jobHistory(jobId));
            for (final TaskStatusEvent event : jobHistory.getEvents()) {
                if (event.getStatus().getState() == FAILED) {
                    return true;
                }
            }
            return null;
        }
    });
    // wait for the job to come back up and start healthchecking again
    awaitTaskState(jobId, testHost(), HEALTHCHECKING);
    pokeAndVerifyRegistration(client, jobId, timeout);
}
Also used : TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) HttpHealthCheck(com.spotify.helios.common.descriptors.HttpHealthCheck) HealthCheck(com.spotify.helios.common.descriptors.HealthCheck) ExecHealthCheck(com.spotify.helios.common.descriptors.ExecHealthCheck) TcpHealthCheck(com.spotify.helios.common.descriptors.TcpHealthCheck) TaskStatusEvents(com.spotify.helios.common.protocol.TaskStatusEvents) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) ServiceEndpoint(com.spotify.helios.common.descriptors.ServiceEndpoint) Endpoint(com.spotify.helios.serviceregistration.ServiceRegistration.Endpoint) IOException(java.io.IOException) JobStatus(com.spotify.helios.common.descriptors.JobStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Aggregations

JobStatus (com.spotify.helios.common.descriptors.JobStatus)23 JobId (com.spotify.helios.common.descriptors.JobId)19 Job (com.spotify.helios.common.descriptors.Job)11 Test (org.junit.Test)9 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)8 HeliosClient (com.spotify.helios.client.HeliosClient)6 Map (java.util.Map)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 Deployment (com.spotify.helios.common.descriptors.Deployment)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 IOException (java.io.IOException)3 PortMapping (com.spotify.helios.common.descriptors.PortMapping)2 ServiceEndpoint (com.spotify.helios.common.descriptors.ServiceEndpoint)2 TaskStatusEvent (com.spotify.helios.common.descriptors.TaskStatusEvent)2 JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)2 Integer.toHexString (java.lang.Integer.toHexString)2 List (java.util.List)2 ExecutionException (java.util.concurrent.ExecutionException)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ImmutableList (com.google.common.collect.ImmutableList)1