Search in sources :

Example 11 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class DeploymentGroupTest method testRollingUpdateCoordination.

@Test
public void testRollingUpdateCoordination() throws Exception {
    // stop the default master
    master.stopAsync().awaitTerminated();
    // start a bunch of masters and agents
    final Map<String, MasterMain> masters = startDefaultMasters(3);
    final Map<String, AgentMain> agents = Maps.newLinkedHashMap();
    for (int i = 0; i < 20; i++) {
        final String name = TEST_HOST + i;
        agents.put(name, startDefaultAgent(name, "--labels", TEST_LABEL));
    }
    // create a deployment group and start rolling out
    cli("create-deployment-group", "--json", TEST_GROUP, TEST_LABEL);
    final JobId jobId = createJob(testJobName, testJobVersion, BUSYBOX, IDLE_COMMAND);
    cli("rolling-update", "--async", "--par", String.valueOf(agents.size()), testJobNameAndVersion, TEST_GROUP);
    // wait until the task is running on the final agent
    awaitTaskState(jobId, getLast(agents.keySet()), TaskStatus.State.RUNNING);
    // ensure that all masters were involved
    final Set<String> deployingMasters = Sets.newHashSet();
    final Map<String, HostStatus> hostStatuses = defaultClient().hostStatuses(Lists.newArrayList(agents.keySet())).get();
    for (final HostStatus status : hostStatuses.values()) {
        for (final Deployment deployment : status.getJobs().values()) {
            deployingMasters.add(deployment.getDeployerMaster());
        }
    }
    assertEquals(masters.size(), deployingMasters.size());
}
Also used : AgentMain(com.spotify.helios.agent.AgentMain) MasterMain(com.spotify.helios.master.MasterMain) Deployment(com.spotify.helios.common.descriptors.Deployment) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Matchers.containsString(org.hamcrest.Matchers.containsString) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 12 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class DeploymentGroupTest method testRemovingAgentTagUndeploysJob.

@Test
public void testRemovingAgentTagUndeploysJob() throws Exception {
    final HeliosClient client = defaultClient();
    final String oldHost = testHost();
    final String deregisterHost = testHost() + "2";
    final String unchangedHost = testHost() + "3";
    final String newHost = testHost() + "4";
    final String anotherNewHost = testHost() + "5";
    @SuppressWarnings("VariableDeclarationUsageDistance") AgentMain oldAgent = startDefaultAgent(oldHost, "--labels", "foo=bar");
    awaitUpWithLabels(oldHost, "foo", "bar");
    final AgentMain deregisterAgent = startDefaultAgent(deregisterHost, "--labels", "foo=bar");
    awaitUpWithLabels(deregisterHost, "foo", "bar");
    startDefaultAgent(unchangedHost, "--labels", "foo=bar");
    awaitUpWithLabels(unchangedHost, "foo", "bar");
    cli("create-deployment-group", "--json", TEST_GROUP, "foo=bar");
    final JobId jobId = createJob(testJobName, testJobVersion, BUSYBOX, IDLE_COMMAND);
    cli("rolling-update", "--async", testJobNameAndVersion, TEST_GROUP);
    awaitTaskState(jobId, oldHost, TaskStatus.State.RUNNING);
    awaitTaskState(jobId, deregisterHost, TaskStatus.State.RUNNING);
    awaitTaskState(jobId, unchangedHost, TaskStatus.State.RUNNING);
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
    // Rollout should be complete and on its second iteration at this point.
    // Start another agent and wait for it to have the job deployed to it.
    startDefaultAgent(newHost, "--labels", "foo=bar");
    awaitUpWithLabels(newHost, "foo", "bar");
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
    awaitTaskState(jobId, newHost, TaskStatus.State.RUNNING);
    // Restart the old agent with labels that still match the deployment group
    // The job should not be undeployed.
    stopAgent(oldAgent);
    oldAgent = startDefaultAgent(oldHost, "--labels", "foo=bar", "another=label");
    awaitUpWithLabels(oldHost, "foo", "bar", "another", "label");
    awaitTaskState(jobId, oldHost, TaskStatus.State.RUNNING);
    // Restart the old agent with labels that do not match the deployment group.
    stopAgent(oldAgent);
    oldAgent = startDefaultAgent(oldHost, "--labels", "foo=notbar");
    awaitUpWithLabels(oldHost, "foo", "notbar");
    // ...which should trigger a rolling update
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
    // Start yet another agent in order to trigger another rolling update.
    startDefaultAgent(anotherNewHost, "--labels", "foo=bar");
    // Wait for the rolling update(s) to finish.
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
    // ...which should remove the job.
    awaitUndeployed(oldHost, jobId);
    // Restart the old agent with labels that match the deployment group (again)
    // The job should be deployed.
    stopAgent(oldAgent);
    startDefaultAgent(oldHost, "--labels", "foo=bar");
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
    awaitTaskState(jobId, oldHost, TaskStatus.State.RUNNING);
    // Deregister an agent
    stopAgent(deregisterAgent);
    final HostDeregisterResponse deregisterResponse = client.deregisterHost(deregisterHost).get();
    assertEquals(HostDeregisterResponse.Status.OK, deregisterResponse.getStatus());
    // Make sure we 'undeploy' from the now non-existent agent.
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
    awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
}
Also used : HostDeregisterResponse(com.spotify.helios.common.protocol.HostDeregisterResponse) AgentMain(com.spotify.helios.agent.AgentMain) Matchers.containsString(org.hamcrest.Matchers.containsString) HeliosClient(com.spotify.helios.client.HeliosClient) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 13 with AgentMain

use of com.spotify.helios.agent.AgentMain in project helios by spotify.

the class AgentZooKeeperDownTolerationTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertNotNull(dockerClient.inspectContainer(firstTaskStatus.getContainerId()));
    // Stop zookeeper
    zk().stop();
    // Wait for a while and make sure that the container is still running
    Thread.sleep(5000);
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent1.stopAsync().awaitTerminated();
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertFalse(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Wait for a while and make sure that a new container was spawned
    final String firstRestartedContainerId = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {

        @Override
        public String call() throws Exception {
            final List<Container> containers = listContainers(dockerClient, testTag);
            return containers.size() == 1 ? containers.get(0).id() : null;
        }
    });
    // Stop the agent
    agent2.stopAsync().awaitTerminated();
    // Kill the container
    dockerClient.killContainer(firstRestartedContainerId);
    assertFalse(dockerClient.inspectContainer(firstRestartedContainerId).state().running());
    // Start the agent again
    startDefaultAgent(testHost());
    // Wait for a while and make sure that a new container was spawned
    final String secondRestartedContainerId = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {

        @Override
        public String call() throws Exception {
            final List<Container> containers = listContainers(dockerClient, testTag);
            return containers.size() == 1 ? containers.get(0).id() : null;
        }
    });
    assertTrue(dockerClient.inspectContainer(secondRestartedContainerId).state().running());
    // Start zookeeper
    zk().start();
    // Verify that the agent is listed as up
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the new container id to be reflected in the task status
    Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final JobStatus jobStatus = client.jobStatus(jobId).get();
            final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
            return taskStatus != null && Objects.equals(taskStatus.getContainerId(), secondRestartedContainerId) ? taskStatus : null;
        }
    });
}
Also used : DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) JobStatus(com.spotify.helios.common.descriptors.JobStatus) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) List(java.util.List) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Aggregations

AgentMain (com.spotify.helios.agent.AgentMain)13 Test (org.junit.Test)11 HeliosClient (com.spotify.helios.client.HeliosClient)8 JobId (com.spotify.helios.common.descriptors.JobId)7 Deployment (com.spotify.helios.common.descriptors.Deployment)5 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)4 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)4 DockerClient (com.spotify.docker.client.DockerClient)3 HostStatus (com.spotify.helios.common.descriptors.HostStatus)3 Job (com.spotify.helios.common.descriptors.Job)3 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)3 HostDeregisterResponse (com.spotify.helios.common.protocol.HostDeregisterResponse)2 Matchers.containsString (org.hamcrest.Matchers.containsString)2 JobStatus (com.spotify.helios.common.descriptors.JobStatus)1 PortMapping (com.spotify.helios.common.descriptors.PortMapping)1 State (com.spotify.helios.common.descriptors.TaskStatus.State)1 JobDeleteResponse (com.spotify.helios.common.protocol.JobDeleteResponse)1 JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)1 SetGoalResponse (com.spotify.helios.common.protocol.SetGoalResponse)1 MasterMain (com.spotify.helios.master.MasterMain)1