Search in sources :

Example 6 with JobUndeployResponse

use of com.spotify.helios.common.protocol.JobUndeployResponse in project helios by spotify.

the class AgentRestartTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent1.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    final HostStatus hostStatus = client.hostStatus(testHost()).get();
    final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
    if (firstTaskStatus.getState() == PULLING_IMAGE) {
        final State state = taskStatus.getState();
        assertTrue(state == RUNNING || state == PULLING_IMAGE);
    } else {
        assertEquals(RUNNING, taskStatus.getState());
    }
    assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent2.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Start the agent again
    final AgentMain agent3 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the job to be restarted in a new container
    final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent3.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill and destroy the container
    dockerClient.killContainer(secondTaskStatus.getContainerId());
    removeContainer(dockerClient, secondTaskStatus.getContainerId());
    // Start the agent again
    final AgentMain agent4 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to be restarted in a new container
    final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent4.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Stop the job
    final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
    // Start the agent again
    final AgentMain agent5 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is stopped
    awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent5.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the job
    final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, started.getStatus());
    // Start the agent again
    final AgentMain agent6 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is started
    awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent6.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start the agent again
    startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to get removed
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
}
Also used : SetGoalResponse(com.spotify.helios.common.protocol.SetGoalResponse) DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) State(com.spotify.helios.common.descriptors.TaskStatus.State) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 7 with JobUndeployResponse

use of com.spotify.helios.common.protocol.JobUndeployResponse in project helios by spotify.

the class TokenTest method undeploy.

private void undeploy(final String token, final JobUndeployResponse.Status status) throws Exception {
    final List<String> args = buildArgs(token, testJobNameAndVersion, testHost());
    final JobUndeployResponse response = cliJson(JobUndeployResponse.class, "undeploy", args);
    assertThat(response.getStatus(), equalTo(status));
}
Also used : JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse)

Example 8 with JobUndeployResponse

use of com.spotify.helios.common.protocol.JobUndeployResponse in project helios by spotify.

the class UndeployRaceTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final String agentId = "test-agent-id";
    final HeliosClient client = defaultClient();
    // Register a host without the agent running
    client.registerHost(testHost(), agentId);
    // Create, deploy and undeploy a job on the host without the agent running
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    final Deployment deployment = Deployment.of(jobId, START);
    // Wait for host to be registered in the master. Otherwise, the client.deploy() call will
    // return HOST_NOT_FOUND
    Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {

        @Override
        public String call() throws Exception {
            final List<String> hosts = client.listHosts().get();
            if (hosts.contains(testHost())) {
                return testHost();
            }
            return null;
        }
    });
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start agent
    startDefaultAgent(testHost(), "--id", agentId);
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to disappear
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the job can be deleted
    assertEquals(JobDeleteResponse.Status.OK, client.deleteJob(jobId).get().getStatus());
}
Also used : CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) Deployment(com.spotify.helios.common.descriptors.Deployment) List(java.util.List) HeliosClient(com.spotify.helios.client.HeliosClient) Job(com.spotify.helios.common.descriptors.Job) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 9 with JobUndeployResponse

use of com.spotify.helios.common.protocol.JobUndeployResponse in project helios by spotify.

the class HeliosSoloDeployment method undeployLeftoverJobs.

/**
 * Undeploy jobs left over by {@link TemporaryJobs}. TemporaryJobs should clean these up,
 * but sometimes a few are left behind for whatever reason.
 */
@VisibleForTesting
protected void undeployLeftoverJobs() {
    try {
        // See if there are jobs running on any helios agent. If we are using TemporaryJobs,
        // that class should've undeployed them at this point.
        // Any jobs still running at this point have only been partially cleaned up.
        // We look for jobs via hostStatus() because the job may have been deleted from the master,
        // but the agent may still not have had enough time to undeploy the job from itself.
        final List<String> hosts = heliosClient.listHosts().get();
        for (final String host : hosts) {
            final HostStatus hostStatus = heliosClient.hostStatus(host).get();
            final Map<JobId, TaskStatus> statuses = hostStatus.getStatuses();
            for (final Map.Entry<JobId, TaskStatus> status : statuses.entrySet()) {
                final JobId jobId = status.getKey();
                final Goal goal = status.getValue().getGoal();
                if (goal != Goal.UNDEPLOY) {
                    log.info("Job {} is still set to {} on host {}. Undeploying it now.", jobId, goal, host);
                    final JobUndeployResponse undeployResponse = heliosClient.undeploy(jobId, host).get();
                    log.info("Undeploy response for job {} is {}.", jobId, undeployResponse.getStatus());
                    if (undeployResponse.getStatus() != JobUndeployResponse.Status.OK) {
                        log.warn("Undeploy response for job {} was not OK. This could mean that something " + "beat the helios-solo master in telling the helios-solo agent to " + "undeploy.", jobId);
                    }
                }
                log.info("Waiting for job {} to actually be undeployed...", jobId);
                awaitJobUndeployed(heliosClient, host, jobId, jobUndeployWaitSeconds, TimeUnit.SECONDS);
                log.info("Job {} successfully undeployed.", jobId);
            }
        }
    } catch (Exception e) {
        log.warn("Exception occurred when trying to clean up leftover jobs.", e);
    }
}
Also used : Goal(com.spotify.helios.common.descriptors.Goal) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) JobId(com.spotify.helios.common.descriptors.JobId) DockerCertificateException(com.spotify.docker.client.exceptions.DockerCertificateException) DockerException(com.spotify.docker.client.exceptions.DockerException) ImageNotFoundException(com.spotify.docker.client.exceptions.ImageNotFoundException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 10 with JobUndeployResponse

use of com.spotify.helios.common.protocol.JobUndeployResponse in project helios by spotify.

the class ZooKeeperHeliosFailoverTest method undeploy.

private void undeploy(final JobId jobId) throws Exception {
    // Check job status can be queried
    final JobStatus jobStatus = client.jobStatus(jobId).get();
    assertEquals(RUNNING, jobStatus.getTaskStatuses().get(testHost()).getState());
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Wait for the task to disappear
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse)

Aggregations

JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)11 JobId (com.spotify.helios.common.descriptors.JobId)7 Test (org.junit.Test)5 HostStatus (com.spotify.helios.common.descriptors.HostStatus)4 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)4 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)4 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)4 HeliosClient (com.spotify.helios.client.HeliosClient)3 Deployment (com.spotify.helios.common.descriptors.Deployment)3 Job (com.spotify.helios.common.descriptors.Job)3 JobStatus (com.spotify.helios.common.descriptors.JobStatus)3 JobDeleteResponse (com.spotify.helios.common.protocol.JobDeleteResponse)2 List (java.util.List)2 ExecutionException (java.util.concurrent.ExecutionException)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 DockerClient (com.spotify.docker.client.DockerClient)1 DockerCertificateException (com.spotify.docker.client.exceptions.DockerCertificateException)1 DockerException (com.spotify.docker.client.exceptions.DockerException)1