use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class DeregisterTest method testRegistrationResolutionTtlNotExpired.
@Test(expected = TimeoutException.class)
public void testRegistrationResolutionTtlNotExpired() throws Exception {
startDefaultMaster();
final String host = testHost();
final AgentMain agent = startDefaultAgent(host);
final HeliosClient client = defaultClient();
// Wait for agent to come up
awaitHostRegistered(client, host, LONG_WAIT_SECONDS, SECONDS);
// Wait for agent to be UP and report HostInfo
awaitHostStatusWithHostInfo(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
// Kill off agent
agent.stopAsync().awaitTerminated();
awaitHostStatus(client, host, DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start a new agent with the same hostname but have it generate a different ID
resetAgentStateDir();
// instead check for the TimeoutException while polling for it being UP.
try {
startDefaultAgent(host, "--zk-registration-ttl", "9999");
} catch (IllegalStateException ignored) {
// ignored
}
awaitHostStatus(client, host, UP, 10, SECONDS);
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class DeregisterTest method testJobsArePreservedWhenReregistering.
@Test
public void testJobsArePreservedWhenReregistering() throws Exception {
startDefaultMaster();
final String host = testHost();
final AgentMain agent = startDefaultAgent(host, "--labels", "num=1");
final HeliosClient client = defaultClient();
awaitHostStatus(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy a job and wait for it to be running
final JobId jobId = createJob(testJobName, testJobVersion, BUSYBOX, IDLE_COMMAND);
deployJob(jobId, host);
awaitJobState(client, host, jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
// Kill off agent
agent.stopAsync().awaitTerminated();
awaitHostStatus(client, host, DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start a new agent with the same hostname but have it generate a different ID
resetAgentStateDir();
startDefaultAgent(host, "--zk-registration-ttl", "0", "--labels", "num=2");
// Check that the new host is registered
awaitHostRegistered(client, host, LONG_WAIT_SECONDS, SECONDS);
awaitHostStatusWithLabels(client, host, UP, ImmutableMap.of("num", "2"));
// Check that the job we previously deployed is preserved
awaitJobState(client, host, jobId, RUNNING, WAIT_TIMEOUT_SECONDS, SECONDS);
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class DeregisterTest method testDeregisterJobDeployedWithoutStatus.
// Verify that we can deregister a host there are jobs deployed to it, for which there's no
// corresponding status information. For example, if a job was deployed to the host after is went
// down.
@Test
public void testDeregisterJobDeployedWithoutStatus() throws Exception {
startDefaultMaster();
final String host = testHost();
final HeliosClient client = defaultClient();
final DefaultZooKeeperClient zkClient = new DefaultZooKeeperClient(zk().curatorWithSuperAuth());
final String idPath = Paths.configHostId(host);
ZooKeeperRegistrarUtil.registerHost(zkClient, idPath, host, UUID.randomUUID().toString());
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setPorts(ImmutableMap.of("foo", PortMapping.of(4711), "bar", PortMapping.of(4712, ports.localPort("bar")))).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, host).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Deregister agent
final HostDeregisterResponse deregisterResponse = client.deregisterHost(host).get();
assertEquals(HostDeregisterResponse.Status.OK, deregisterResponse.getStatus());
// Verify that it's possible to remove the job
final JobDeleteResponse deleteResponse = client.deleteJob(jobId).get();
assertEquals(JobDeleteResponse.Status.OK, deleteResponse.getStatus());
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class FlappingTest method test.
@Test
public void test() throws Exception {
// CircleCI boxes are too slow -- the job doesn't stop or restart fast enough to ever flap
assumeFalse(isCircleCi());
startDefaultMaster();
final String host = testHost();
startDefaultAgent(host);
final HeliosClient client = defaultClient();
awaitHostStatus(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
final Job flapper = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(asList("nc", "-p", "4711", "-l")).addPort("poke", PortMapping.of(4711)).build();
final JobId jobId = createJob(flapper);
deployJob(jobId, host);
awaitTaskState(jobId, host, RUNNING);
// Poke the container to make it exit until it's classified as flapping
Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<Object>() {
@Override
public Object call() throws Exception {
final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(host);
if (taskStatus.getThrottled() == FLAPPING) {
return true;
}
final PortMapping port = taskStatus.getPorts().get("poke");
assert port.getExternalPort() != null;
poke(port.getExternalPort());
return null;
}
});
// Verify that the job recovers after we stop poking
awaitJobThrottle(client, host, jobId, NO, LONG_WAIT_SECONDS, SECONDS);
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class HealthCheckTest method testContainerDiesDuringHealthcheck.
@Test
public void testContainerDiesDuringHealthcheck() throws Exception {
startDefaultMaster();
final HeliosClient client = defaultClient();
startDefaultAgent(testHost(), "--service-registry=" + registryAddress);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
final HealthCheck healthCheck = TcpHealthCheck.of("health");
final Job job = pokeJob(healthCheck);
final JobId jobId = createJob(job);
deployJob(jobId, testHost());
awaitTaskState(jobId, testHost(), HEALTHCHECKING);
// kill the underlying container
final JobStatus jobStatus = getOrNull(client.jobStatus(jobId));
final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
getNewDockerClient().killContainer(taskStatus.getContainerId());
// ensure the job is marked as failed
final int timeout = WAIT_TIMEOUT_SECONDS;
Polling.await(timeout, SECONDS, new Callable<Object>() {
@Override
public Object call() throws Exception {
final TaskStatusEvents jobHistory = getOrNull(client.jobHistory(jobId));
for (final TaskStatusEvent event : jobHistory.getEvents()) {
if (event.getStatus().getState() == FAILED) {
return true;
}
}
return null;
}
});
// wait for the job to come back up and start healthchecking again
awaitTaskState(jobId, testHost(), HEALTHCHECKING);
pokeAndVerifyRegistration(client, jobId, timeout);
}
Aggregations