Search in sources :

Example 1 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class MultiplePortJobTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    // 'Reserve' a 2 port wide range of ports.
    final Range<Integer> portRange = temporaryPorts.localPortRange("agent1", 2);
    // Start an agent using the aforementioned 2 port wide range.
    final AgentMain agent1 = startDefaultAgent(testHost(), "--port-range=" + portRange.lowerEndpoint() + ":" + portRange.upperEndpoint());
    try (final DockerClient dockerClient = getNewDockerClient()) {
        final HeliosClient client = defaultClient();
        awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
        // foo is a mapping of 4711 -> A port dynamically allocated by the agent's PortAllocator
        // bar is a mapping of 4712 -> A static port randomly selected by temporaryPorts
        final Map<String, PortMapping> ports1 = ImmutableMap.of("foo", PortMapping.of(4711), "bar", staticMapping1);
        // foo is a mapping of 4711 -> A port dynamically allocated by the agent's PortAllocator
        // bar is a mapping of 4712 -> A static port randomly selected by temporaryPorts
        final Map<String, PortMapping> ports2 = ImmutableMap.of("foo", PortMapping.of(4711), "bar", staticMapping2);
        final JobId jobId1 = createJob(testJobName + 1, testJobVersion, BUSYBOX, IDLE_COMMAND, EMPTY_ENV, ports1);
        assertNotNull(jobId1);
        deployJob(jobId1, testHost());
        final TaskStatus firstTaskStatus1 = awaitJobState(client, testHost(), jobId1, RUNNING, LONG_WAIT_SECONDS, SECONDS);
        final JobId jobId2 = createJob(testJobName + 2, testJobVersion, BUSYBOX, IDLE_COMMAND, EMPTY_ENV, ports2);
        assertNotNull(jobId2);
        deployJob(jobId2, testHost());
        final TaskStatus firstTaskStatus2 = awaitJobState(client, testHost(), jobId2, RUNNING, LONG_WAIT_SECONDS, SECONDS);
        // Verify we allocated dynamic ports from within the specified range.
        assertTrue(portRange.contains(firstTaskStatus1.getPorts().get("foo").getExternalPort()));
        assertTrue(portRange.contains(firstTaskStatus2.getPorts().get("foo").getExternalPort()));
        // Verify we allocated the static ports we asked for.
        assertEquals(staticMapping1, firstTaskStatus1.getPorts().get("bar"));
        assertEquals(staticMapping2, firstTaskStatus2.getPorts().get("bar"));
        // Verify we didn't allocate the same dynamic port to both jobs.
        assertNotEquals(firstTaskStatus1.getPorts().get("foo"), firstTaskStatus2.getPorts().get("foo"));
        // TODO (dano): the supervisor should report the allocated ports at all times
        // Verify that port allocation is kept across container restarts
        dockerClient.killContainer(firstTaskStatus1.getContainerId());
        final TaskStatus restartedTaskStatus1 = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

            @Override
            public TaskStatus call() throws Exception {
                final HostStatus hostStatus = client.hostStatus(testHost()).get();
                final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId1);
                return (taskStatus != null && taskStatus.getState() == RUNNING && !Objects.equals(taskStatus.getContainerId(), firstTaskStatus1.getContainerId())) ? taskStatus : null;
            }
        });
        assertEquals(firstTaskStatus1.getPorts(), restartedTaskStatus1.getPorts());
        // Verify that port allocation is kept across agent restarts
        agent1.stopAsync().awaitTerminated();
        dockerClient.killContainer(firstTaskStatus2.getContainerId());
        startDefaultAgent(testHost());
        final TaskStatus restartedTaskStatus2 = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

            @Override
            public TaskStatus call() throws Exception {
                final HostStatus hostStatus = client.hostStatus(testHost()).get();
                final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId2);
                return (taskStatus != null && taskStatus.getState() == RUNNING && !Objects.equals(taskStatus.getContainerId(), firstTaskStatus2.getContainerId())) ? taskStatus : null;
            }
        });
        assertEquals(firstTaskStatus2.getPorts(), restartedTaskStatus2.getPorts());
    }
}
Also used : DockerClient(com.spotify.docker.client.DockerClient) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) AgentMain(com.spotify.helios.agent.AgentMain) HostStatus(com.spotify.helios.common.descriptors.HostStatus) PortMapping(com.spotify.helios.common.descriptors.PortMapping) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 2 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class SystemTestBase method startAgent.

protected AgentMain startAgent(final String... args) throws Exception {
    final AgentMain main = new AgentMain(args);
    main.startAsync().awaitRunning();
    services.add(main);
    return main;
}
Also used : AgentMain(com.spotify.helios.agent.AgentMain)

Example 3 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class ZooKeeperRestoreTest method verifyAgentPushesTaskStateAfterRestore.

@Test
public void verifyAgentPushesTaskStateAfterRestore() throws Exception {
    // Start agent once to have it register
    final AgentMain agent1 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    agent1.stopAsync().awaitTerminated();
    // Deploy job
    final Deployment deployment = Deployment.of(fooJob.getId(), START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Back up zk
    zkc.backup(backupDir);
    // Start agent
    startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for agent to indicate that job is running
    awaitJobState(client, testHost(), fooJob.getId(), RUNNING, LONG_WAIT_SECONDS, SECONDS);
    // Restore zk, erasing task state
    zkc.stop();
    zkc.restore(backupDir);
    zkc.start();
    // Wait for agent to again indicate that job is running
    awaitJobState(client, testHost(), fooJob.getId(), RUNNING, LONG_WAIT_SECONDS, SECONDS);
    // Remove task status
    zkc.curatorWithSuperAuth().delete().forPath(Paths.statusHostJob(testHost(), fooJob.getId()));
}
Also used : AgentMain(com.spotify.helios.agent.AgentMain) Deployment(com.spotify.helios.common.descriptors.Deployment) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) Test(org.junit.Test)

Example 4 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class DeregisterTest method testDeregister.

@Test
public void testDeregister() throws Exception {
    startDefaultMaster();
    final String host = testHost();
    final AgentMain agent = startDefaultAgent(host);
    final HeliosClient client = defaultClient();
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setPorts(ImmutableMap.of("foo", PortMapping.of(4711), "bar", PortMapping.of(4712, ports.localPort("bar")))).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, host, LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, host).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    awaitJobState(client, host, jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    // Kill off agent
    agent.stopAsync().awaitTerminated();
    // Deregister agent
    final HostDeregisterResponse deregisterResponse = client.deregisterHost(host).get();
    assertEquals(HostDeregisterResponse.Status.OK, deregisterResponse.getStatus());
    // Verify that it's possible to remove the job
    final JobDeleteResponse deleteResponse = client.deleteJob(jobId).get();
    assertEquals(JobDeleteResponse.Status.OK, deleteResponse.getStatus());
}
Also used : HostDeregisterResponse(com.spotify.helios.common.protocol.HostDeregisterResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) Job(com.spotify.helios.common.descriptors.Job) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) JobId(com.spotify.helios.common.descriptors.JobId) JobDeleteResponse(com.spotify.helios.common.protocol.JobDeleteResponse) Test(org.junit.Test)

Example 5 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class AgentRestartTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent1.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    final HostStatus hostStatus = client.hostStatus(testHost()).get();
    final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
    if (firstTaskStatus.getState() == PULLING_IMAGE) {
        final State state = taskStatus.getState();
        assertTrue(state == RUNNING || state == PULLING_IMAGE);
    } else {
        assertEquals(RUNNING, taskStatus.getState());
    }
    assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent2.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Start the agent again
    final AgentMain agent3 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the job to be restarted in a new container
    final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent3.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill and destroy the container
    dockerClient.killContainer(secondTaskStatus.getContainerId());
    removeContainer(dockerClient, secondTaskStatus.getContainerId());
    // Start the agent again
    final AgentMain agent4 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to be restarted in a new container
    final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent4.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Stop the job
    final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
    // Start the agent again
    final AgentMain agent5 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is stopped
    awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent5.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the job
    final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, started.getStatus());
    // Start the agent again
    final AgentMain agent6 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is started
    awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent6.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start the agent again
    startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to get removed
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
}
Also used : SetGoalResponse(com.spotify.helios.common.protocol.SetGoalResponse) DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) State(com.spotify.helios.common.descriptors.TaskStatus.State) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Aggregations

AgentMain (com.spotify.helios.agent.AgentMain)13 Test (org.junit.Test)11 HeliosClient (com.spotify.helios.client.HeliosClient)8 JobId (com.spotify.helios.common.descriptors.JobId)7 Deployment (com.spotify.helios.common.descriptors.Deployment)5 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)4 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)4 DockerClient (com.spotify.docker.client.DockerClient)3 HostStatus (com.spotify.helios.common.descriptors.HostStatus)3 Job (com.spotify.helios.common.descriptors.Job)3 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)3 HostDeregisterResponse (com.spotify.helios.common.protocol.HostDeregisterResponse)2 Matchers.containsString (org.hamcrest.Matchers.containsString)2 JobStatus (com.spotify.helios.common.descriptors.JobStatus)1 PortMapping (com.spotify.helios.common.descriptors.PortMapping)1 State (com.spotify.helios.common.descriptors.TaskStatus.State)1 JobDeleteResponse (com.spotify.helios.common.protocol.JobDeleteResponse)1 JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)1 SetGoalResponse (com.spotify.helios.common.protocol.SetGoalResponse)1 MasterMain (com.spotify.helios.master.MasterMain)1