Search in sources :

Example 16 with TaskStatus

use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.

the class ZooKeeperMasterModel method getJobHistory.

/**
   * Given a jobId and host, returns the N most recent events in its history on that host in the
   * cluster.
   */
@Override
public List<TaskStatusEvent> getJobHistory(final JobId jobId, final String host) throws JobDoesNotExistException {
    final Job descriptor = getJob(jobId);
    if (descriptor == null) {
        throw new JobDoesNotExistException(jobId);
    }
    final ZooKeeperClient client = provider.get("getJobHistory");
    final List<String> hosts;
    try {
        hosts = (!isNullOrEmpty(host)) ? singletonList(host) : client.getChildren(Paths.historyJobHosts(jobId));
    } catch (NoNodeException e) {
        return emptyList();
    } catch (KeeperException e) {
        throw Throwables.propagate(e);
    }
    final List<TaskStatusEvent> jsEvents = Lists.newArrayList();
    for (final String h : hosts) {
        final List<String> events;
        try {
            events = client.getChildren(Paths.historyJobHostEvents(jobId, h));
        } catch (NoNodeException e) {
            continue;
        } catch (KeeperException e) {
            throw Throwables.propagate(e);
        }
        for (final String event : events) {
            try {
                final byte[] data = client.getData(Paths.historyJobHostEventsTimestamp(jobId, h, Long.valueOf(event)));
                final TaskStatus status = Json.read(data, TaskStatus.class);
                jsEvents.add(new TaskStatusEvent(status, Long.valueOf(event), h));
            } catch (NoNodeException e) {
            // ignore, it went away before we read it
            } catch (KeeperException | IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }
    return Ordering.from(EVENT_COMPARATOR).sortedCopy(jsEvents);
}
Also used : TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) IOException(java.io.IOException) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Job(com.spotify.helios.common.descriptors.Job) KeeperException(org.apache.zookeeper.KeeperException)

Example 17 with TaskStatus

use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.

the class OldJobReaperTest method events.

private List<TaskStatusEvent> events(final List<Long> timestamps) {
    final ImmutableList.Builder<TaskStatusEvent> builder = ImmutableList.builder();
    // First sort by timestamps ascending
    final List<Long> copy = Lists.newArrayList(timestamps);
    Collections.sort(copy);
    for (final Long timestamp : timestamps) {
        final TaskStatus taskStatus = TaskStatus.newBuilder().setJob(DUMMY_JOB).setGoal(Goal.START).setState(State.RUNNING).build();
        builder.add(new TaskStatusEvent(taskStatus, timestamp, ""));
    }
    return builder.build();
}
Also used : TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) ImmutableList(com.google.common.collect.ImmutableList) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus)

Example 18 with TaskStatus

use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.

the class ContainerHostNameTest method testValidHostname.

@Test
public void testValidHostname() throws Exception {
    startDefaultMaster();
    startDefaultAgent(testHost());
    awaitHostStatus(testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    try (final DockerClient dockerClient = getNewDockerClient()) {
        final List<String> command = asList("hostname", "-f");
        // Create job
        final JobId jobId = createJob(Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setHostname(testHost()).setCommand(command).build());
        // deploy
        deployJob(jobId, testHost());
        final TaskStatus taskStatus = awaitTaskState(jobId, testHost(), EXITED);
        final String log;
        try (final LogStream logs = dockerClient.logs(taskStatus.getContainerId(), stdout(), stderr())) {
            log = logs.readFully();
        }
        assertThat(log, containsString(testHost()));
    }
}
Also used : DockerClient(com.spotify.docker.client.DockerClient) Matchers.containsString(org.hamcrest.Matchers.containsString) LogStream(com.spotify.docker.client.LogStream) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 19 with TaskStatus

use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.

the class GracePeriodTest method setup.

@Before
public void setup() throws Exception {
    final ContainerState runningState = Mockito.mock(ContainerState.class);
    when(runningState.running()).thenReturn(true);
    when(runningResponse.state()).thenReturn(runningState);
    when(runningResponse.networkSettings()).thenReturn(NetworkSettings.builder().ports(Collections.emptyMap()).build());
    final ContainerState stoppedState = Mockito.mock(ContainerState.class);
    when(stoppedState.running()).thenReturn(false);
    when(stoppedResponse.state()).thenReturn(stoppedState);
    when(retryPolicy.delay(any(ThrottleState.class))).thenReturn(10L);
    when(registrar.register(any(ServiceRegistration.class))).thenReturn(new NopServiceRegistrationHandle());
    final TaskConfig config = TaskConfig.builder().namespace(NAMESPACE).host("AGENT_NAME").job(JOB).envVars(ENV).defaultRegistrationDomain("domain").build();
    final TaskStatus.Builder taskStatus = TaskStatus.newBuilder().setJob(JOB).setEnv(ENV).setPorts(PORTS);
    final StatusUpdater statusUpdater = new DefaultStatusUpdater(model, taskStatus);
    final TaskMonitor monitor = new TaskMonitor(JOB.getId(), FlapController.create(), statusUpdater);
    final TaskRunnerFactory runnerFactory = TaskRunnerFactory.builder().registrar(registrar).config(config).dockerClient(docker).listener(monitor).build();
    sut = Supervisor.newBuilder().setJob(JOB).setStatusUpdater(statusUpdater).setDockerClient(docker).setRestartPolicy(retryPolicy).setRunnerFactory(runnerFactory).setMetrics(new NoopSupervisorMetrics()).setMonitor(monitor).setSleeper(sleeper).build();
    final ConcurrentMap<JobId, TaskStatus> statusMap = Maps.newConcurrentMap();
    doAnswer(new Answer<Object>() {

        @Override
        public Object answer(final InvocationOnMock invocationOnMock) {
            final Object[] arguments = invocationOnMock.getArguments();
            final JobId jobId = (JobId) arguments[0];
            final TaskStatus status = (TaskStatus) arguments[1];
            statusMap.put(jobId, status);
            return null;
        }
    }).when(model).setTaskStatus(eq(JOB.getId()), taskStatusCaptor.capture());
    when(model.getTaskStatus(eq(JOB.getId()))).thenAnswer(new Answer<Object>() {

        @Override
        public Object answer(final InvocationOnMock invocationOnMock) throws Throwable {
            final JobId jobId = (JobId) invocationOnMock.getArguments()[0];
            return statusMap.get(jobId);
        }
    });
}
Also used : ThrottleState(com.spotify.helios.common.descriptors.ThrottleState) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) ContainerState(com.spotify.docker.client.messages.ContainerState) InvocationOnMock(org.mockito.invocation.InvocationOnMock) NoopSupervisorMetrics(com.spotify.helios.servicescommon.statistics.NoopSupervisorMetrics) JobId(com.spotify.helios.common.descriptors.JobId) ServiceRegistration(com.spotify.helios.serviceregistration.ServiceRegistration) NopServiceRegistrationHandle(com.spotify.helios.serviceregistration.NopServiceRegistrationHandle) Before(org.junit.Before)

Example 20 with TaskStatus

use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.

the class AgentRestartTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent1.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    final HostStatus hostStatus = client.hostStatus(testHost()).get();
    final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
    if (firstTaskStatus.getState() == PULLING_IMAGE) {
        final State state = taskStatus.getState();
        assertTrue(state == RUNNING || state == PULLING_IMAGE);
    } else {
        assertEquals(RUNNING, taskStatus.getState());
    }
    assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent2.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Start the agent again
    final AgentMain agent3 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the job to be restarted in a new container
    final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent3.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill and destroy the container
    dockerClient.killContainer(secondTaskStatus.getContainerId());
    removeContainer(dockerClient, secondTaskStatus.getContainerId());
    // Start the agent again
    final AgentMain agent4 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to be restarted in a new container
    final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent4.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Stop the job
    final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
    // Start the agent again
    final AgentMain agent5 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is stopped
    awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent5.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the job
    final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, started.getStatus());
    // Start the agent again
    final AgentMain agent6 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is started
    awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent6.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start the agent again
    startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to get removed
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
}
Also used : SetGoalResponse(com.spotify.helios.common.protocol.SetGoalResponse) DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) State(com.spotify.helios.common.descriptors.TaskStatus.State) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Aggregations

TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)53 JobId (com.spotify.helios.common.descriptors.JobId)40 Test (org.junit.Test)27 DockerClient (com.spotify.docker.client.DockerClient)18 Deployment (com.spotify.helios.common.descriptors.Deployment)15 Job (com.spotify.helios.common.descriptors.Job)14 HeliosClient (com.spotify.helios.client.HeliosClient)13 LogStream (com.spotify.docker.client.LogStream)10 HostStatus (com.spotify.helios.common.descriptors.HostStatus)10 JobStatus (com.spotify.helios.common.descriptors.JobStatus)8 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)8 Matchers.containsString (org.hamcrest.Matchers.containsString)8 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)7 PortMapping (com.spotify.helios.common.descriptors.PortMapping)6 Map (java.util.Map)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 TaskStatusEvent (com.spotify.helios.common.descriptors.TaskStatusEvent)4 JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)4 IOException (java.io.IOException)4 ExecutionException (java.util.concurrent.ExecutionException)4