Search in sources :

Example 6 with HostStatus

use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.

the class AgentReportingTest method verifyAgentReportsDockerVersion.

@Test
public void verifyAgentReportsDockerVersion() throws Exception {
    startDefaultMaster();
    startDefaultAgent(testHost());
    final HeliosClient client = defaultClient();
    final DockerVersion dockerVersion = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<DockerVersion>() {

        @Override
        public DockerVersion call() throws Exception {
            final HostStatus status = client.hostStatus(testHost()).get();
            return status == null ? null : status.getHostInfo() == null ? null : status.getHostInfo().getDockerVersion();
        }
    });
    try (final DockerClient dockerClient = getNewDockerClient()) {
        final String expectedDockerVersion = dockerClient.version().version();
        assertThat(dockerVersion.getVersion(), is(expectedDockerVersion));
    }
}
Also used : DockerVersion(com.spotify.helios.common.descriptors.DockerVersion) DockerClient(com.spotify.docker.client.DockerClient) HostStatus(com.spotify.helios.common.descriptors.HostStatus) HeliosClient(com.spotify.helios.client.HeliosClient) Test(org.junit.Test)

Example 7 with HostStatus

use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.

the class AgentRestartTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent1.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    final HostStatus hostStatus = client.hostStatus(testHost()).get();
    final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
    if (firstTaskStatus.getState() == PULLING_IMAGE) {
        final State state = taskStatus.getState();
        assertTrue(state == RUNNING || state == PULLING_IMAGE);
    } else {
        assertEquals(RUNNING, taskStatus.getState());
    }
    assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent2.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Start the agent again
    final AgentMain agent3 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the job to be restarted in a new container
    final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent3.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill and destroy the container
    dockerClient.killContainer(secondTaskStatus.getContainerId());
    removeContainer(dockerClient, secondTaskStatus.getContainerId());
    // Start the agent again
    final AgentMain agent4 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to be restarted in a new container
    final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent4.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Stop the job
    final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
    // Start the agent again
    final AgentMain agent5 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is stopped
    awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent5.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the job
    final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, started.getStatus());
    // Start the agent again
    final AgentMain agent6 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is started
    awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent6.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start the agent again
    startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to get removed
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
}
Also used : SetGoalResponse(com.spotify.helios.common.protocol.SetGoalResponse) DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) State(com.spotify.helios.common.descriptors.TaskStatus.State) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 8 with HostStatus

use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.

the class CliHostListTest method testStatusFilter.

@Test
public void testStatusFilter() throws Exception {
    String jsonOutput = cli("hosts", "-f", "--json", "--status", "UP");
    Map<String, HostStatus> statuses = Json.readUnchecked(jsonOutput, new TypeReference<Map<String, HostStatus>>() {
    });
    final HeliosClient client = defaultClient();
    Map<String, HostStatus> expectedStatuses = client.hostStatuses(ImmutableList.of(hostname1)).get();
    assertThat(expectedStatuses, equalTo(statuses));
    jsonOutput = cli("hosts", "-f", "--json", "--status", "DOWN");
    statuses = Json.readUnchecked(jsonOutput, new TypeReference<Map<String, HostStatus>>() {
    });
    expectedStatuses = client.hostStatuses(ImmutableList.of(hostname2)).get();
    assertThat(expectedStatuses, equalTo(statuses));
}
Also used : HostStatus(com.spotify.helios.common.descriptors.HostStatus) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HeliosClient(com.spotify.helios.client.HeliosClient) Map(java.util.Map) Test(org.junit.Test)

Example 9 with HostStatus

use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.

the class SystemTestBase method awaitHostStatusWithLabels.

protected HostStatus awaitHostStatusWithLabels(final HeliosClient client, final String host, final HostStatus.Status status, final Map<String, String> labels) throws Exception {
    final HostStatus hostStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, () -> {
        final HostStatus candidate = getOrNull(client.hostStatus(host));
        if (candidate == null || candidate.getStatus() != status || // labels are stored in ZK after the host has come up
        candidate.getLabels().size() != labels.size()) {
            return null;
        }
        return candidate;
    });
    assertThat("host " + host + " has status=" + status + " with labels=" + hostStatus.getLabels(), hostStatus.getLabels(), is(labels));
    return hostStatus;
}
Also used : HostStatus(com.spotify.helios.common.descriptors.HostStatus)

Example 10 with HostStatus

use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.

the class DeploymentGroupTest method testUpdateFailedHostsChangedDeploymentGroupHosts.

// A test that ensures deployment groups that failed during a rolling update triggered by
// changing hosts will perform a new rolling update if the hosts change again.
@Test
public void testUpdateFailedHostsChangedDeploymentGroupHosts() throws Exception {
    final ZooKeeperClient client = spy(this.client);
    final ZooKeeperMasterModel masterModel = spy(newMasterModel(client));
    // Return a job so we can add a real deployment group.
    final Job job = Job.newBuilder().setCommand(ImmutableList.of("COMMAND")).setImage("IMAGE").setName("JOB_NAME").setVersion("VERSION").build();
    doReturn(job).when(masterModel).getJob(job.getId());
    // Add a real deployment group.
    final DeploymentGroup dg = DeploymentGroup.newBuilder().setName(GROUP_NAME).setHostSelectors(ImmutableList.of(HostSelector.parse("role=melmac"))).setJobId(job.getId()).setRolloutOptions(RolloutOptions.getDefault()).setRollingUpdateReason(HOSTS_CHANGED).build();
    masterModel.addDeploymentGroup(dg);
    // Give the deployment group a host.
    client.setData(Paths.statusDeploymentGroupHosts(dg.getName()), Json.asBytes(ImmutableList.of("host1")));
    // And a status...
    client.setData(Paths.statusDeploymentGroup(dg.getName()), DeploymentGroupStatus.newBuilder().setState(FAILED).build().toJsonBytes());
    // Pretend our new host is UP.
    final HostStatus statusUp = mock(HostStatus.class);
    doReturn(HostStatus.Status.UP).when(statusUp).getStatus();
    doReturn(statusUp).when(masterModel).getHostStatus("host2");
    // Switch out our host!
    masterModel.updateDeploymentGroupHosts(dg.getName(), ImmutableList.of("host2"));
    // Ensure we write the same DG status again.
    // This is a no-op, but it means we triggered a rolling update.
    final ZooKeeperOperation setDeploymentGroup = set(Paths.configDeploymentGroup(dg.getName()), dg);
    verify(client, times(2)).transaction(opCaptor.capture());
    assertThat(opCaptor.getValue(), hasItem(setDeploymentGroup));
}
Also used : ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) DefaultZooKeeperClient(com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) Test(org.junit.Test)

Aggregations

HostStatus (com.spotify.helios.common.descriptors.HostStatus)28 Test (org.junit.Test)17 JobId (com.spotify.helios.common.descriptors.JobId)13 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)10 HeliosClient (com.spotify.helios.client.HeliosClient)9 Map (java.util.Map)8 Job (com.spotify.helios.common.descriptors.Job)6 Deployment (com.spotify.helios.common.descriptors.Deployment)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 DockerClient (com.spotify.docker.client.DockerClient)4 JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)4 ExecutionException (java.util.concurrent.ExecutionException)4 AgentMain (com.spotify.helios.agent.AgentMain)3 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)3 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)3 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)3 TimeoutException (java.util.concurrent.TimeoutException)3 ExceptionMetered (com.codahale.metrics.annotation.ExceptionMetered)2 Timed (com.codahale.metrics.annotation.Timed)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)2