use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.
the class ZooKeeperMasterModel method getJobHistory.
/**
* Given a jobId and host, returns the N most recent events in its history on that host in the
* cluster.
*/
@Override
public List<TaskStatusEvent> getJobHistory(final JobId jobId, final String host) throws JobDoesNotExistException {
final Job descriptor = getJob(jobId);
if (descriptor == null) {
throw new JobDoesNotExistException(jobId);
}
final ZooKeeperClient client = provider.get("getJobHistory");
final List<String> hosts;
try {
hosts = (!isNullOrEmpty(host)) ? singletonList(host) : client.getChildren(Paths.historyJobHosts(jobId));
} catch (NoNodeException e) {
return emptyList();
} catch (KeeperException e) {
throw Throwables.propagate(e);
}
final List<TaskStatusEvent> jsEvents = Lists.newArrayList();
for (final String h : hosts) {
final List<String> events;
try {
events = client.getChildren(Paths.historyJobHostEvents(jobId, h));
} catch (NoNodeException e) {
continue;
} catch (KeeperException e) {
throw Throwables.propagate(e);
}
for (final String event : events) {
try {
final byte[] data = client.getData(Paths.historyJobHostEventsTimestamp(jobId, h, Long.valueOf(event)));
final TaskStatus status = Json.read(data, TaskStatus.class);
jsEvents.add(new TaskStatusEvent(status, Long.valueOf(event), h));
} catch (NoNodeException e) {
// ignore, it went away before we read it
} catch (KeeperException | IOException e) {
throw Throwables.propagate(e);
}
}
}
return Ordering.from(EVENT_COMPARATOR).sortedCopy(jsEvents);
}
use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.
the class OldJobReaperTest method events.
private List<TaskStatusEvent> events(final List<Long> timestamps) {
final ImmutableList.Builder<TaskStatusEvent> builder = ImmutableList.builder();
// First sort by timestamps ascending
final List<Long> copy = Lists.newArrayList(timestamps);
Collections.sort(copy);
for (final Long timestamp : timestamps) {
final TaskStatus taskStatus = TaskStatus.newBuilder().setJob(DUMMY_JOB).setGoal(Goal.START).setState(State.RUNNING).build();
builder.add(new TaskStatusEvent(taskStatus, timestamp, ""));
}
return builder.build();
}
use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.
the class ContainerHostNameTest method testValidHostname.
@Test
public void testValidHostname() throws Exception {
startDefaultMaster();
startDefaultAgent(testHost());
awaitHostStatus(testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
try (final DockerClient dockerClient = getNewDockerClient()) {
final List<String> command = asList("hostname", "-f");
// Create job
final JobId jobId = createJob(Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setHostname(testHost()).setCommand(command).build());
// deploy
deployJob(jobId, testHost());
final TaskStatus taskStatus = awaitTaskState(jobId, testHost(), EXITED);
final String log;
try (final LogStream logs = dockerClient.logs(taskStatus.getContainerId(), stdout(), stderr())) {
log = logs.readFully();
}
assertThat(log, containsString(testHost()));
}
}
use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.
the class GracePeriodTest method setup.
@Before
public void setup() throws Exception {
final ContainerState runningState = Mockito.mock(ContainerState.class);
when(runningState.running()).thenReturn(true);
when(runningResponse.state()).thenReturn(runningState);
when(runningResponse.networkSettings()).thenReturn(NetworkSettings.builder().ports(Collections.emptyMap()).build());
final ContainerState stoppedState = Mockito.mock(ContainerState.class);
when(stoppedState.running()).thenReturn(false);
when(stoppedResponse.state()).thenReturn(stoppedState);
when(retryPolicy.delay(any(ThrottleState.class))).thenReturn(10L);
when(registrar.register(any(ServiceRegistration.class))).thenReturn(new NopServiceRegistrationHandle());
final TaskConfig config = TaskConfig.builder().namespace(NAMESPACE).host("AGENT_NAME").job(JOB).envVars(ENV).defaultRegistrationDomain("domain").build();
final TaskStatus.Builder taskStatus = TaskStatus.newBuilder().setJob(JOB).setEnv(ENV).setPorts(PORTS);
final StatusUpdater statusUpdater = new DefaultStatusUpdater(model, taskStatus);
final TaskMonitor monitor = new TaskMonitor(JOB.getId(), FlapController.create(), statusUpdater);
final TaskRunnerFactory runnerFactory = TaskRunnerFactory.builder().registrar(registrar).config(config).dockerClient(docker).listener(monitor).build();
sut = Supervisor.newBuilder().setJob(JOB).setStatusUpdater(statusUpdater).setDockerClient(docker).setRestartPolicy(retryPolicy).setRunnerFactory(runnerFactory).setMetrics(new NoopSupervisorMetrics()).setMonitor(monitor).setSleeper(sleeper).build();
final ConcurrentMap<JobId, TaskStatus> statusMap = Maps.newConcurrentMap();
doAnswer(new Answer<Object>() {
@Override
public Object answer(final InvocationOnMock invocationOnMock) {
final Object[] arguments = invocationOnMock.getArguments();
final JobId jobId = (JobId) arguments[0];
final TaskStatus status = (TaskStatus) arguments[1];
statusMap.put(jobId, status);
return null;
}
}).when(model).setTaskStatus(eq(JOB.getId()), taskStatusCaptor.capture());
when(model.getTaskStatus(eq(JOB.getId()))).thenAnswer(new Answer<Object>() {
@Override
public Object answer(final InvocationOnMock invocationOnMock) throws Throwable {
final JobId jobId = (JobId) invocationOnMock.getArguments()[0];
return statusMap.get(jobId);
}
});
}
use of com.spotify.helios.common.descriptors.TaskStatus in project helios by spotify.
the class AgentRestartTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
final DockerClient dockerClient = getNewDockerClient();
final HeliosClient client = defaultClient();
final AgentMain agent1 = startDefaultAgent(testHost());
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertJobEquals(job, firstTaskStatus.getJob());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent1.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the agent again
final AgentMain agent2 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for a while and make sure that the same container is still running
Thread.sleep(5000);
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
if (firstTaskStatus.getState() == PULLING_IMAGE) {
final State state = taskStatus.getState();
assertTrue(state == RUNNING || state == PULLING_IMAGE);
} else {
assertEquals(RUNNING, taskStatus.getState());
}
assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent2.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill the container
dockerClient.killContainer(firstTaskStatus.getContainerId());
assertEquals(0, listContainers(dockerClient, testTag).size());
// Start the agent again
final AgentMain agent3 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the job to be restarted in a new container
final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
// Stop the agent
agent3.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill and destroy the container
dockerClient.killContainer(secondTaskStatus.getContainerId());
removeContainer(dockerClient, secondTaskStatus.getContainerId());
// Start the agent again
final AgentMain agent4 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to be restarted in a new container
final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
// Stop the agent
agent4.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Stop the job
final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
// Start the agent again
final AgentMain agent5 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is stopped
awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
// Stop the agent
agent5.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the job
final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, started.getStatus());
// Start the agent again
final AgentMain agent6 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is started
awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertEquals(1, listContainers(dockerClient, testTag).size());
// Stop the agent
agent6.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Undeploy the job
final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
// Start the agent again
startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to get removed
awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
}
Aggregations