use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.
the class AgentReportingTest method verifyAgentReportsDockerVersion.
@Test
public void verifyAgentReportsDockerVersion() throws Exception {
startDefaultMaster();
startDefaultAgent(testHost());
final HeliosClient client = defaultClient();
final DockerVersion dockerVersion = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<DockerVersion>() {
@Override
public DockerVersion call() throws Exception {
final HostStatus status = client.hostStatus(testHost()).get();
return status == null ? null : status.getHostInfo() == null ? null : status.getHostInfo().getDockerVersion();
}
});
try (final DockerClient dockerClient = getNewDockerClient()) {
final String expectedDockerVersion = dockerClient.version().version();
assertThat(dockerVersion.getVersion(), is(expectedDockerVersion));
}
}
use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.
the class AgentRestartTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
final DockerClient dockerClient = getNewDockerClient();
final HeliosClient client = defaultClient();
final AgentMain agent1 = startDefaultAgent(testHost());
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertJobEquals(job, firstTaskStatus.getJob());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent1.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the agent again
final AgentMain agent2 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for a while and make sure that the same container is still running
Thread.sleep(5000);
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
if (firstTaskStatus.getState() == PULLING_IMAGE) {
final State state = taskStatus.getState();
assertTrue(state == RUNNING || state == PULLING_IMAGE);
} else {
assertEquals(RUNNING, taskStatus.getState());
}
assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent2.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill the container
dockerClient.killContainer(firstTaskStatus.getContainerId());
assertEquals(0, listContainers(dockerClient, testTag).size());
// Start the agent again
final AgentMain agent3 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the job to be restarted in a new container
final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
// Stop the agent
agent3.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill and destroy the container
dockerClient.killContainer(secondTaskStatus.getContainerId());
removeContainer(dockerClient, secondTaskStatus.getContainerId());
// Start the agent again
final AgentMain agent4 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to be restarted in a new container
final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
// Stop the agent
agent4.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Stop the job
final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
// Start the agent again
final AgentMain agent5 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is stopped
awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
// Stop the agent
agent5.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the job
final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, started.getStatus());
// Start the agent again
final AgentMain agent6 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is started
awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertEquals(1, listContainers(dockerClient, testTag).size());
// Stop the agent
agent6.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Undeploy the job
final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
// Start the agent again
startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to get removed
awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
}
use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.
the class CliHostListTest method testStatusFilter.
@Test
public void testStatusFilter() throws Exception {
String jsonOutput = cli("hosts", "-f", "--json", "--status", "UP");
Map<String, HostStatus> statuses = Json.readUnchecked(jsonOutput, new TypeReference<Map<String, HostStatus>>() {
});
final HeliosClient client = defaultClient();
Map<String, HostStatus> expectedStatuses = client.hostStatuses(ImmutableList.of(hostname1)).get();
assertThat(expectedStatuses, equalTo(statuses));
jsonOutput = cli("hosts", "-f", "--json", "--status", "DOWN");
statuses = Json.readUnchecked(jsonOutput, new TypeReference<Map<String, HostStatus>>() {
});
expectedStatuses = client.hostStatuses(ImmutableList.of(hostname2)).get();
assertThat(expectedStatuses, equalTo(statuses));
}
use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.
the class SystemTestBase method awaitHostStatusWithLabels.
protected HostStatus awaitHostStatusWithLabels(final HeliosClient client, final String host, final HostStatus.Status status, final Map<String, String> labels) throws Exception {
final HostStatus hostStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, () -> {
final HostStatus candidate = getOrNull(client.hostStatus(host));
if (candidate == null || candidate.getStatus() != status || // labels are stored in ZK after the host has come up
candidate.getLabels().size() != labels.size()) {
return null;
}
return candidate;
});
assertThat("host " + host + " has status=" + status + " with labels=" + hostStatus.getLabels(), hostStatus.getLabels(), is(labels));
return hostStatus;
}
use of com.spotify.helios.common.descriptors.HostStatus in project helios by spotify.
the class DeploymentGroupTest method testUpdateFailedHostsChangedDeploymentGroupHosts.
// A test that ensures deployment groups that failed during a rolling update triggered by
// changing hosts will perform a new rolling update if the hosts change again.
@Test
public void testUpdateFailedHostsChangedDeploymentGroupHosts() throws Exception {
final ZooKeeperClient client = spy(this.client);
final ZooKeeperMasterModel masterModel = spy(newMasterModel(client));
// Return a job so we can add a real deployment group.
final Job job = Job.newBuilder().setCommand(ImmutableList.of("COMMAND")).setImage("IMAGE").setName("JOB_NAME").setVersion("VERSION").build();
doReturn(job).when(masterModel).getJob(job.getId());
// Add a real deployment group.
final DeploymentGroup dg = DeploymentGroup.newBuilder().setName(GROUP_NAME).setHostSelectors(ImmutableList.of(HostSelector.parse("role=melmac"))).setJobId(job.getId()).setRolloutOptions(RolloutOptions.getDefault()).setRollingUpdateReason(HOSTS_CHANGED).build();
masterModel.addDeploymentGroup(dg);
// Give the deployment group a host.
client.setData(Paths.statusDeploymentGroupHosts(dg.getName()), Json.asBytes(ImmutableList.of("host1")));
// And a status...
client.setData(Paths.statusDeploymentGroup(dg.getName()), DeploymentGroupStatus.newBuilder().setState(FAILED).build().toJsonBytes());
// Pretend our new host is UP.
final HostStatus statusUp = mock(HostStatus.class);
doReturn(HostStatus.Status.UP).when(statusUp).getStatus();
doReturn(statusUp).when(masterModel).getHostStatus("host2");
// Switch out our host!
masterModel.updateDeploymentGroupHosts(dg.getName(), ImmutableList.of("host2"));
// Ensure we write the same DG status again.
// This is a no-op, but it means we triggered a rolling update.
final ZooKeeperOperation setDeploymentGroup = set(Paths.configDeploymentGroup(dg.getName()), dg);
verify(client, times(2)).transaction(opCaptor.capture());
assertThat(opCaptor.getValue(), hasItem(setDeploymentGroup));
}
Aggregations