Search in sources :

Example 21 with HeliosClient

use of com.spotify.helios.client.HeliosClient in project helios by spotify.

the class AgentRestartTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent1.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    final HostStatus hostStatus = client.hostStatus(testHost()).get();
    final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
    if (firstTaskStatus.getState() == PULLING_IMAGE) {
        final State state = taskStatus.getState();
        assertTrue(state == RUNNING || state == PULLING_IMAGE);
    } else {
        assertEquals(RUNNING, taskStatus.getState());
    }
    assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent2.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Start the agent again
    final AgentMain agent3 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the job to be restarted in a new container
    final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent3.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill and destroy the container
    dockerClient.killContainer(secondTaskStatus.getContainerId());
    removeContainer(dockerClient, secondTaskStatus.getContainerId());
    // Start the agent again
    final AgentMain agent4 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to be restarted in a new container
    final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        @Override
        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
        }
    });
    assertEquals(1, listContainers(dockerClient, testTag).size());
    assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
    // Stop the agent
    agent4.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Stop the job
    final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
    // Start the agent again
    final AgentMain agent5 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is stopped
    awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent5.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the job
    final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, started.getStatus());
    // Start the agent again
    final AgentMain agent6 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is started
    awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    agent6.stopAsync().awaitTerminated();
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start the agent again
    startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to get removed
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
}
Also used : SetGoalResponse(com.spotify.helios.common.protocol.SetGoalResponse) DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) State(com.spotify.helios.common.descriptors.TaskStatus.State) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 22 with HeliosClient

use of com.spotify.helios.client.HeliosClient in project helios by spotify.

the class MultiTargetControlCommand method run.

@Override
public int run(final Namespace options, final List<Target> targets, final PrintStream out, final PrintStream err, final String username, final boolean json, final BufferedReader stdin) throws Exception {
    final Builder<TargetAndClient> clientBuilder = ImmutableList.<TargetAndClient>builder();
    for (final Target target : targets) {
        final HeliosClient client = Utils.getClient(target, err, username, options);
        if (client == null) {
            return 1;
        }
        clientBuilder.add(new TargetAndClient(target, client));
    }
    final List<TargetAndClient> clients = clientBuilder.build();
    final int result;
    try {
        result = run(options, clients, out, json, stdin);
    } catch (ExecutionException e) {
        final Throwable cause = e.getCause();
        // otherwise "Request timed out to master http://ash2-helios-a4.ash2.spotify.net:5800"
        if (cause instanceof TimeoutException) {
            err.println("Request timed out to master");
        } else {
            throw Throwables.propagate(cause);
        }
        return 1;
    } finally {
        for (final TargetAndClient cc : clients) {
            cc.getClient().close();
        }
    }
    return result;
}
Also used : Target(com.spotify.helios.cli.Target) HeliosClient(com.spotify.helios.client.HeliosClient) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException)

Example 23 with HeliosClient

use of com.spotify.helios.client.HeliosClient in project helios by spotify.

the class HeliosDeploymentResource method before.

/** Ensure that the HeliosDeployment is up. */
@Override
public void before() throws Throwable {
    super.before();
    log.info("verifying connectivity to {}", deployment.address());
    // wait for the helios master to be available
    Polling.awaitUnchecked(30, TimeUnit.SECONDS, "Could not connect to HeliosDeployment at " + deployment.address() + " after %d %s", new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            final HostAndPort hap = deployment.address();
            final SocketAddress address = new InetSocketAddress(hap.getHostText(), hap.getPort());
            log.debug("attempting to connect to {}", address);
            try {
                final Socket s = new Socket();
                s.connect(address, 100);
                log.info("successfully connected to address {} for {}", address, deployment);
                return true;
            } catch (SocketTimeoutException | ConnectException e) {
                log.debug("could not yet connect to HeliosDeployment: {}", e.toString());
                return null;
            }
        }
    });
    // Ensure that at least one agent is available and UP in this HeliosDeployment.
    // This prevents continuing with the test when starting up helios-solo before the agent is
    // registered.
    final HeliosClient client = client();
    Polling.awaitUnchecked(30, TimeUnit.SECONDS, "No agents were available at HeliosDeployment at " + deployment.address() + " after %d %s", new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            final ListenableFuture<List<String>> future = client.listHosts();
            final List<String> hosts;
            try {
                // use a short timeout to allow this request to be retried a few times by the
                // Polling.await loop
                hosts = future.get(1, TimeUnit.SECONDS);
            } catch (TimeoutException | InterruptedException e) {
                log.debug("timed out waiting for listHosts request to finish, will retry");
                return null;
            }
            if (hosts.isEmpty()) {
                log.debug("0 agents in {}, will retry", deployment);
                return null;
            }
            // Check that at least one host is UP (is maintaining a reasonably reliable
            // connection to ZK) in addition to registering.
            final ListenableFuture<Map<String, HostStatus>> statusFuture = client.hostStatuses(hosts);
            final Map<String, HostStatus> hostStatuses;
            try {
                hostStatuses = statusFuture.get(1, TimeUnit.SECONDS);
            } catch (TimeoutException | InterruptedException e) {
                log.debug("timed out waiting for hostStatuses to finish, will retry");
                return null;
            }
            for (final HostStatus hostStatus : hostStatuses.values()) {
                if (hostStatus != null && hostStatus.getStatus() == HostStatus.Status.UP) {
                    log.info("Ensured that at least one agent is UP in this HeliosDeployment, " + "continuing with test!");
                    return true;
                }
            }
            return null;
        }
    });
}
Also used : InetSocketAddress(java.net.InetSocketAddress) HeliosClient(com.spotify.helios.client.HeliosClient) TimeoutException(java.util.concurrent.TimeoutException) SocketTimeoutException(java.net.SocketTimeoutException) ConnectException(java.net.ConnectException) HostAndPort(com.google.common.net.HostAndPort) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) List(java.util.List) HostStatus(com.spotify.helios.common.descriptors.HostStatus) SocketAddress(java.net.SocketAddress) InetSocketAddress(java.net.InetSocketAddress) Map(java.util.Map) Socket(java.net.Socket)

Example 24 with HeliosClient

use of com.spotify.helios.client.HeliosClient in project helios by spotify.

the class ZooKeeperClusterIdTest method testMaster.

@Test
public void testMaster() throws Exception {
    startDefaultMaster("--zk-cluster-id=" + zkClusterId);
    final HeliosClient client = defaultClient();
    // This should succeed since the cluster ID was created by SystemTestBase
    client.jobs().get();
    // Delete the cluster ID
    zk().curatorWithSuperAuth().delete().forPath(Paths.configId(zkClusterId));
    // Call jobs again, and this time it should throw an exception because the cluster ID is gone
    try {
        client.jobs().get();
    } catch (ExecutionException e) {
        assertThat(e.getMessage(), containsString("500"));
    }
}
Also used : HeliosClient(com.spotify.helios.client.HeliosClient) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 25 with HeliosClient

use of com.spotify.helios.client.HeliosClient in project helios by spotify.

the class ReapingTest method test.

@Test
public void test() throws Exception {
    startDefaultMaster();
    final String id = "test-" + toHexString(new SecureRandom().nextInt());
    final String namespace = "helios-" + id;
    final String intruder1 = intruder(namespace);
    final String intruder2 = intruder(namespace);
    // Start a container in the agent namespace
    startContainer(intruder1);
    // Start agent
    final HeliosClient client = defaultClient();
    startDefaultAgent(testHost(), "--id=" + id);
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // With LXC, killing a container results in exit code 0.
    // In docker 1.5 killing a container results in exit code 137, in previous versions it's -1.
    final List<Integer> expectedExitCodes = docker.info().executionDriver().startsWith("lxc-") ? Collections.singletonList(0) : asList(-1, 137);
    // Wait for the agent to kill the container
    final ContainerExit exit1 = docker.waitContainer(intruder1);
    assertThat(exit1.statusCode(), isIn(expectedExitCodes));
    // Start another container in the agent namespace
    startContainer(intruder2);
    // Wait for the agent to kill the second container as well
    final ContainerExit exit2 = docker.waitContainer(intruder2);
    assertThat(exit2.statusCode(), isIn(expectedExitCodes));
}
Also used : SecureRandom(java.security.SecureRandom) ContainerExit(com.spotify.docker.client.messages.ContainerExit) Integer.toHexString(java.lang.Integer.toHexString) HeliosClient(com.spotify.helios.client.HeliosClient) Test(org.junit.Test)

Aggregations

HeliosClient (com.spotify.helios.client.HeliosClient)57 Test (org.junit.Test)53 JobId (com.spotify.helios.common.descriptors.JobId)35 Job (com.spotify.helios.common.descriptors.Job)25 Deployment (com.spotify.helios.common.descriptors.Deployment)19 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)14 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)13 Matchers.containsString (org.hamcrest.Matchers.containsString)11 DockerClient (com.spotify.docker.client.DockerClient)10 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)10 HostStatus (com.spotify.helios.common.descriptors.HostStatus)9 AgentMain (com.spotify.helios.agent.AgentMain)8 JobStatus (com.spotify.helios.common.descriptors.JobStatus)6 PortMapping (com.spotify.helios.common.descriptors.PortMapping)5 Map (java.util.Map)5 ExecHealthCheck (com.spotify.helios.common.descriptors.ExecHealthCheck)4 HealthCheck (com.spotify.helios.common.descriptors.HealthCheck)4 HttpHealthCheck (com.spotify.helios.common.descriptors.HttpHealthCheck)4 ServiceEndpoint (com.spotify.helios.common.descriptors.ServiceEndpoint)4 TcpHealthCheck (com.spotify.helios.common.descriptors.TcpHealthCheck)4