use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class AgentRestartTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
final DockerClient dockerClient = getNewDockerClient();
final HeliosClient client = defaultClient();
final AgentMain agent1 = startDefaultAgent(testHost());
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertJobEquals(job, firstTaskStatus.getJob());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent1.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the agent again
final AgentMain agent2 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for a while and make sure that the same container is still running
Thread.sleep(5000);
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
if (firstTaskStatus.getState() == PULLING_IMAGE) {
final State state = taskStatus.getState();
assertTrue(state == RUNNING || state == PULLING_IMAGE);
} else {
assertEquals(RUNNING, taskStatus.getState());
}
assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent2.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill the container
dockerClient.killContainer(firstTaskStatus.getContainerId());
assertEquals(0, listContainers(dockerClient, testTag).size());
// Start the agent again
final AgentMain agent3 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the job to be restarted in a new container
final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
// Stop the agent
agent3.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill and destroy the container
dockerClient.killContainer(secondTaskStatus.getContainerId());
removeContainer(dockerClient, secondTaskStatus.getContainerId());
// Start the agent again
final AgentMain agent4 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to be restarted in a new container
final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
// Stop the agent
agent4.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Stop the job
final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
// Start the agent again
final AgentMain agent5 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is stopped
awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
// Stop the agent
agent5.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the job
final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, started.getStatus());
// Start the agent again
final AgentMain agent6 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is started
awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertEquals(1, listContainers(dockerClient, testTag).size());
// Stop the agent
agent6.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Undeploy the job
final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
// Start the agent again
startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to get removed
awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class MultiTargetControlCommand method run.
@Override
public int run(final Namespace options, final List<Target> targets, final PrintStream out, final PrintStream err, final String username, final boolean json, final BufferedReader stdin) throws Exception {
final Builder<TargetAndClient> clientBuilder = ImmutableList.<TargetAndClient>builder();
for (final Target target : targets) {
final HeliosClient client = Utils.getClient(target, err, username, options);
if (client == null) {
return 1;
}
clientBuilder.add(new TargetAndClient(target, client));
}
final List<TargetAndClient> clients = clientBuilder.build();
final int result;
try {
result = run(options, clients, out, json, stdin);
} catch (ExecutionException e) {
final Throwable cause = e.getCause();
// otherwise "Request timed out to master http://ash2-helios-a4.ash2.spotify.net:5800"
if (cause instanceof TimeoutException) {
err.println("Request timed out to master");
} else {
throw Throwables.propagate(cause);
}
return 1;
} finally {
for (final TargetAndClient cc : clients) {
cc.getClient().close();
}
}
return result;
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class HeliosDeploymentResource method before.
/** Ensure that the HeliosDeployment is up. */
@Override
public void before() throws Throwable {
super.before();
log.info("verifying connectivity to {}", deployment.address());
// wait for the helios master to be available
Polling.awaitUnchecked(30, TimeUnit.SECONDS, "Could not connect to HeliosDeployment at " + deployment.address() + " after %d %s", new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
final HostAndPort hap = deployment.address();
final SocketAddress address = new InetSocketAddress(hap.getHostText(), hap.getPort());
log.debug("attempting to connect to {}", address);
try {
final Socket s = new Socket();
s.connect(address, 100);
log.info("successfully connected to address {} for {}", address, deployment);
return true;
} catch (SocketTimeoutException | ConnectException e) {
log.debug("could not yet connect to HeliosDeployment: {}", e.toString());
return null;
}
}
});
// Ensure that at least one agent is available and UP in this HeliosDeployment.
// This prevents continuing with the test when starting up helios-solo before the agent is
// registered.
final HeliosClient client = client();
Polling.awaitUnchecked(30, TimeUnit.SECONDS, "No agents were available at HeliosDeployment at " + deployment.address() + " after %d %s", new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
final ListenableFuture<List<String>> future = client.listHosts();
final List<String> hosts;
try {
// use a short timeout to allow this request to be retried a few times by the
// Polling.await loop
hosts = future.get(1, TimeUnit.SECONDS);
} catch (TimeoutException | InterruptedException e) {
log.debug("timed out waiting for listHosts request to finish, will retry");
return null;
}
if (hosts.isEmpty()) {
log.debug("0 agents in {}, will retry", deployment);
return null;
}
// Check that at least one host is UP (is maintaining a reasonably reliable
// connection to ZK) in addition to registering.
final ListenableFuture<Map<String, HostStatus>> statusFuture = client.hostStatuses(hosts);
final Map<String, HostStatus> hostStatuses;
try {
hostStatuses = statusFuture.get(1, TimeUnit.SECONDS);
} catch (TimeoutException | InterruptedException e) {
log.debug("timed out waiting for hostStatuses to finish, will retry");
return null;
}
for (final HostStatus hostStatus : hostStatuses.values()) {
if (hostStatus != null && hostStatus.getStatus() == HostStatus.Status.UP) {
log.info("Ensured that at least one agent is UP in this HeliosDeployment, " + "continuing with test!");
return true;
}
}
return null;
}
});
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class ZooKeeperClusterIdTest method testMaster.
@Test
public void testMaster() throws Exception {
startDefaultMaster("--zk-cluster-id=" + zkClusterId);
final HeliosClient client = defaultClient();
// This should succeed since the cluster ID was created by SystemTestBase
client.jobs().get();
// Delete the cluster ID
zk().curatorWithSuperAuth().delete().forPath(Paths.configId(zkClusterId));
// Call jobs again, and this time it should throw an exception because the cluster ID is gone
try {
client.jobs().get();
} catch (ExecutionException e) {
assertThat(e.getMessage(), containsString("500"));
}
}
use of com.spotify.helios.client.HeliosClient in project helios by spotify.
the class ReapingTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
final String id = "test-" + toHexString(new SecureRandom().nextInt());
final String namespace = "helios-" + id;
final String intruder1 = intruder(namespace);
final String intruder2 = intruder(namespace);
// Start a container in the agent namespace
startContainer(intruder1);
// Start agent
final HeliosClient client = defaultClient();
startDefaultAgent(testHost(), "--id=" + id);
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// With LXC, killing a container results in exit code 0.
// In docker 1.5 killing a container results in exit code 137, in previous versions it's -1.
final List<Integer> expectedExitCodes = docker.info().executionDriver().startsWith("lxc-") ? Collections.singletonList(0) : asList(-1, 137);
// Wait for the agent to kill the container
final ContainerExit exit1 = docker.waitContainer(intruder1);
assertThat(exit1.statusCode(), isIn(expectedExitCodes));
// Start another container in the agent namespace
startContainer(intruder2);
// Wait for the agent to kill the second container as well
final ContainerExit exit2 = docker.waitContainer(intruder2);
assertThat(exit2.statusCode(), isIn(expectedExitCodes));
}
Aggregations