use of com.spotify.helios.common.descriptors.TaskStatusEvent in project helios by spotify.
the class OldJobReaperTest method events.
private List<TaskStatusEvent> events(final List<Long> timestamps) {
final ImmutableList.Builder<TaskStatusEvent> builder = ImmutableList.builder();
// First sort by timestamps ascending
final List<Long> copy = Lists.newArrayList(timestamps);
Collections.sort(copy);
for (final Long timestamp : timestamps) {
final TaskStatus taskStatus = TaskStatus.newBuilder().setJob(DUMMY_JOB).setGoal(Goal.START).setState(State.RUNNING).build();
builder.add(new TaskStatusEvent(taskStatus, timestamp, ""));
}
return builder.build();
}
use of com.spotify.helios.common.descriptors.TaskStatusEvent in project helios by spotify.
the class TaskHistoryWriterTest method testWriteWithZooKeeperDownAndInterveningCrash.
@Test
public void testWriteWithZooKeeperDownAndInterveningCrash() throws Exception {
zk.stop();
writer.saveHistoryItem(TASK_STATUS, TIMESTAMP);
// simulate a crash by recreating the writer
writer.stopAsync().awaitTerminated();
makeWriter(client);
zk.start();
final TaskStatusEvent historyItem = Iterables.getOnlyElement(awaitHistoryItems());
assertEquals(JOB_ID, historyItem.getStatus().getJob().getId());
}
use of com.spotify.helios.common.descriptors.TaskStatusEvent in project helios by spotify.
the class TaskHistoryWriter method run.
@Override
public void run() {
while (true) {
final TaskStatusEvent item = getNext();
if (item == null) {
return;
}
final JobId jobId = item.getStatus().getJob().getId();
final String historyPath = Paths.historyJobHostEventsTimestamp(jobId, hostname, item.getTimestamp());
try {
log.debug("writing queued item to zookeeper {} {}", item.getStatus().getJob().getId(), item.getTimestamp());
client.ensurePath(historyPath, true);
client.createAndSetData(historyPath, item.getStatus().toJsonBytes());
// See if too many
final List<String> events = client.getChildren(Paths.historyJobHostEvents(jobId, hostname));
if (events.size() > MAX_NUMBER_STATUS_EVENTS_TO_RETAIN) {
trimStatusEvents(events, jobId);
}
} catch (NodeExistsException e) {
// Ahh, the two generals problem... We handle by doing nothing since the thing
// we wanted in, is in.
log.debug("item we wanted in is already there");
} catch (ConnectionLossException e) {
log.warn("Connection lost while putting item into zookeeper, will retry");
putBack(item);
break;
} catch (KeeperException e) {
log.error("Error putting item into zookeeper, will retry", e);
putBack(item);
break;
}
}
}
use of com.spotify.helios.common.descriptors.TaskStatusEvent in project helios by spotify.
the class HealthCheckTest method testContainerDiesDuringHealthcheck.
@Test
public void testContainerDiesDuringHealthcheck() throws Exception {
startDefaultMaster();
final HeliosClient client = defaultClient();
startDefaultAgent(testHost(), "--service-registry=" + registryAddress);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
final HealthCheck healthCheck = TcpHealthCheck.of("health");
final Job job = pokeJob(healthCheck);
final JobId jobId = createJob(job);
deployJob(jobId, testHost());
awaitTaskState(jobId, testHost(), HEALTHCHECKING);
// kill the underlying container
final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
getNewDockerClient().killContainer(taskStatus.getContainerId());
// ensure the job is marked as failed
final int timeout = WAIT_TIMEOUT_SECONDS;
Polling.await(timeout, SECONDS, new Callable<Object>() {
@Override
public Object call() throws Exception {
final TaskStatusEvents jobHistory = getOrNull(client.jobHistory(jobId));
for (final TaskStatusEvent event : jobHistory.getEvents()) {
if (event.getStatus().getState() == FAILED) {
return true;
}
}
return null;
}
});
// wait for the job to come back up and start healthchecking again
awaitTaskState(jobId, testHost(), HEALTHCHECKING);
pokeAndVerifyRegistration(client, jobId, timeout);
}
use of com.spotify.helios.common.descriptors.TaskStatusEvent in project helios by spotify.
the class JobHistoryTest method testJobHistory.
@Test
public void testJobHistory() throws Exception {
startDefaultMaster();
final HeliosClient client = defaultClient();
startDefaultAgent(testHost());
awaitHostStatus(testHost(), Status.UP, LONG_WAIT_SECONDS, SECONDS);
final JobId jobId = createJob(testJobName, testJobVersion, BUSYBOX, IDLE_COMMAND);
deployJob(jobId, testHost());
awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
undeployJob(jobId, testHost());
awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
final TaskStatusEvents events = Polling.await(WAIT_TIMEOUT_SECONDS, SECONDS, new Callable<TaskStatusEvents>() {
@Override
public TaskStatusEvents call() throws Exception {
final TaskStatusEvents events = client.jobHistory(jobId).get();
final int size = events.getEvents().size();
if (size == 0) {
return null;
}
// We sometimes get more than one PULLING_IMAGE in the history if a pull tempfails.
int requiredEventCount = -1;
for (int i = 0; i < size; i++) {
if (events.getEvents().get(i).getStatus().getState() != State.PULLING_IMAGE) {
requiredEventCount = i + 5;
break;
}
}
if (requiredEventCount == -1) {
return null;
}
if (size < requiredEventCount) {
return null;
}
return events;
}
});
final ListIterator<TaskStatusEvent> it = events.getEvents().listIterator();
while (true) {
final TaskStatusEvent event = it.next();
if (event.getStatus().getState() != State.PULLING_IMAGE) {
// rewind so that this event is the one returned by the next call to it.next() below
it.previous();
break;
}
assertThat(event, not(hasContainerId()));
}
assertThat(it.next(), allOf(hasState(State.CREATING), not(hasContainerId())));
assertThat(it.next(), allOf(hasState(State.STARTING), hasContainerId()));
assertThat(it.next(), hasState(State.RUNNING));
assertThat(it.next(), hasState(State.STOPPING));
assertThat(it.next(), hasState(State.EXITED, State.STOPPED));
}
Aggregations