Search in sources :

Example 11 with CreateJobResponse

use of com.spotify.helios.common.protocol.CreateJobResponse in project helios by spotify.

the class AgentRestartTest method test.

public void test() throws Exception {
    final DockerClient dockerClient = getNewDockerClient();
    final HeliosClient client = defaultClient();
    final AgentMain agent1 = startDefaultAgent(testHost());
    // Create a job
    final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for a while and make sure that the same container is still running
    final HostStatus hostStatus = client.hostStatus(testHost()).get();
    final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
    if (firstTaskStatus.getState() == PULLING_IMAGE) {
        final State state = taskStatus.getState();
        assertTrue(state == RUNNING || state == PULLING_IMAGE);
    } else {
        assertEquals(RUNNING, taskStatus.getState());
    assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill the container
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Start the agent again
    final AgentMain agent3 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the job to be restarted in a new container
    final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Kill and destroy the container
    removeContainer(dockerClient, secondTaskStatus.getContainerId());
    // Start the agent again
    final AgentMain agent4 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to be restarted in a new container
    final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {

        public TaskStatus call() throws Exception {
            final HostStatus hostStatus = client.hostStatus(testHost()).get();
            final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
            return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Stop the job
    final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
    // Start the agent again
    final AgentMain agent5 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is stopped
    awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
    // Stop the agent
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Start the job
    final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
    assertEquals(SetGoalResponse.Status.OK, started.getStatus());
    // Start the agent again
    final AgentMain agent6 = startDefaultAgent(testHost());
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Verify that the task is started
    awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(1, listContainers(dockerClient, testTag).size());
    // Stop the agent
    awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
    // Undeploy the job
    final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
    assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
    // Start the agent again
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
    // Wait for the task to get removed
    awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
    assertEquals(0, listContainers(dockerClient, testTag).size());
Also used : SetGoalResponse(com.spotify.helios.common.protocol.SetGoalResponse) DockerClient(com.spotify.docker.client.DockerClient) Deployment(com.spotify.helios.common.descriptors.Deployment) HeliosClient(com.spotify.helios.client.HeliosClient) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) AgentMain(com.spotify.helios.agent.AgentMain) State(com.spotify.helios.common.descriptors.TaskStatus.State) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) Test(org.junit.Test)

Example 12 with CreateJobResponse

use of com.spotify.helios.common.protocol.CreateJobResponse in project helios by spotify.

the class JobCreateCommand method run.

int run(final Namespace options, final HeliosClient client, final PrintStream out, final boolean json, final BufferedReader stdin) throws ExecutionException, InterruptedException, IOException {
    final boolean quiet = options.getBoolean(quietArg.getDest());
    final Job.Builder builder;
    final String id = options.getString(idArg.getDest());
    final String imageIdentifier = options.getString(imageArg.getDest());
    // Read job configuration from file
    // TODO (dano): look for e.g. Heliosfile in cwd by default?
    final String templateJobId = options.getString(templateArg.getDest());
    final File file = options.get(fileArg.getDest());
    if (file != null && templateJobId != null) {
        throw new IllegalArgumentException("Please use only one of -t/--template and -f/--file");
    if (file != null) {
        if (!file.exists() || !file.isFile() || !file.canRead()) {
            throw new IllegalArgumentException("Cannot read file " + file);
        final byte[] bytes = Files.readAllBytes(file.toPath());
        final String config = new String(bytes, UTF_8);
        final Job job =, Job.class);
        builder = job.toBuilder();
    } else if (templateJobId != null) {
        final Map<JobId, Job> jobs =;
        if (jobs.size() == 0) {
            if (!json) {
                out.printf("Unknown job: %s%n", templateJobId);
            } else {
                final CreateJobResponse createJobResponse = new CreateJobResponse(CreateJobResponse.Status.UNKNOWN_JOB, null, null);
            return 1;
        } else if (jobs.size() > 1) {
            if (!json) {
                out.printf("Ambiguous job reference: %s%n", templateJobId);
            } else {
                final CreateJobResponse createJobResponse = new CreateJobResponse(CreateJobResponse.Status.AMBIGUOUS_JOB_REFERENCE, null, null);
            return 1;
        final Job template = Iterables.getOnlyElement(jobs.values());
        builder = template.toBuilder();
        if (id == null) {
            throw new IllegalArgumentException("Please specify new job name and version");
    } else {
        if (id == null || imageIdentifier == null) {
            throw new IllegalArgumentException("Please specify a file, or a template, or a job name, version and container image");
        builder = Job.newBuilder();
    if (id != null) {
        final String[] parts = id.split(":");
        switch(parts.length) {
            case 3:
            // fall through
            case 2:
            // fall through
            case 1:
                throw new IllegalArgumentException("Invalid Job id: " + id);
    if (imageIdentifier != null) {
    final String hostname = options.getString(hostnameArg.getDest());
    if (!isNullOrEmpty(hostname)) {
    final List<String> command = options.getList(argsArg.getDest());
    if (command != null && !command.isEmpty()) {
    final List<String> envList = options.getList(envArg.getDest());
    // TODO (mbrown): does this mean that env config is only added when there is a CLI flag too?
    if (!envList.isEmpty()) {
        final Map<String, String> env = Maps.newHashMap();
        // Add environmental variables from helios job configuration file
        // Add environmental variables passed in via CLI
        // Overwrite any redundant keys to make CLI args take precedence
        env.putAll(parseListOfPairs(envList, "environment variable"));
    final Map<String, String> metadata = Maps.newHashMap();
    final List<String> metadataList = options.getList(metadataArg.getDest());
    if (!metadataList.isEmpty()) {
        // TODO (mbrown): values from job conf file (which maybe involves dereferencing env vars?)
        metadata.putAll(parseListOfPairs(metadataList, "metadata"));
    // Parse port mappings
    final List<String> portSpecs = options.getList(portArg.getDest());
    final Map<String, PortMapping> explicitPorts = PortMappingParser.parsePortMappings(portSpecs);
    // Merge port mappings
    final Map<String, PortMapping> ports = Maps.newHashMap();
    // Parse service registrations
    final Map<ServiceEndpoint, ServicePorts> explicitRegistration = Maps.newHashMap();
    final Pattern registrationPattern = compile("(?<srv>[a-zA-Z][_\\-\\w]+)(?:/(?<prot>\\w+))?(?:=(?<port>[_\\-\\w]+))?");
    final List<String> registrationSpecs = options.getList(registrationArg.getDest());
    for (final String spec : registrationSpecs) {
        final Matcher matcher = registrationPattern.matcher(spec);
        if (!matcher.matches()) {
            throw new IllegalArgumentException("Bad registration: " + spec);
        final String service ="srv");
        final String proto = fromNullable("prot")).or(HTTP);
        final String optionalPort ="port");
        final String port;
        if (ports.size() == 0) {
            throw new IllegalArgumentException("Need port mappings for service registration.");
        if (optionalPort == null) {
            if (ports.size() != 1) {
                throw new IllegalArgumentException("Need exactly one port mapping for implicit service registration");
            port = Iterables.getLast(ports.keySet());
        } else {
            port = optionalPort;
        explicitRegistration.put(ServiceEndpoint.of(service, proto), ServicePorts.of(port));
    final String registrationDomain = options.getString(registrationDomainArg.getDest());
    if (!isNullOrEmpty(registrationDomain)) {
    // Merge service registrations
    final Map<ServiceEndpoint, ServicePorts> registration = Maps.newHashMap();
    // Get grace period interval
    final Integer gracePeriod = options.getInt(gracePeriodArg.getDest());
    if (gracePeriod != null) {
    // Parse volumes
    final List<String> volumeSpecs = options.getList(volumeArg.getDest());
    for (final String spec : volumeSpecs) {
        final String[] parts = spec.split(":", 2);
        switch(parts.length) {
            // Data volume
            case 1:
            // Bind mount
            case 2:
                final String path = parts[1];
                final String source = parts[0];
                builder.addVolume(path, source);
                throw new IllegalArgumentException("Invalid volume: " + spec);
    // Parse expires timestamp
    final String expires = options.getString(expiresArg.getDest());
    if (expires != null) {
        // Use DateTime to parse the ISO-8601 string
        builder.setExpires(new DateTime(expires).toDate());
    // Parse health check
    final String execString = options.getString(healthCheckExecArg.getDest());
    final List<String> execHealthCheck = (execString == null) ? null : Arrays.asList(execString.split(" "));
    final String httpHealthCheck = options.getString(healthCheckHttpArg.getDest());
    final String tcpHealthCheck = options.getString(healthCheckTcpArg.getDest());
    int numberOfHealthChecks = 0;
    for (final String c : asList(httpHealthCheck, tcpHealthCheck)) {
        if (!isNullOrEmpty(c)) {
    if (execHealthCheck != null && !execHealthCheck.isEmpty()) {
    if (numberOfHealthChecks > 1) {
        throw new IllegalArgumentException("Only one health check may be specified.");
    if (execHealthCheck != null && !execHealthCheck.isEmpty()) {
    } else if (!isNullOrEmpty(httpHealthCheck)) {
        final String[] parts = httpHealthCheck.split(":", 2);
        if (parts.length != 2) {
            throw new IllegalArgumentException("Invalid HTTP health check: " + httpHealthCheck);
        builder.setHealthCheck(HttpHealthCheck.of(parts[0], parts[1]));
    } else if (!isNullOrEmpty(tcpHealthCheck)) {
    final List<String> securityOpt = options.getList(securityOptArg.getDest());
    if (securityOpt != null && !securityOpt.isEmpty()) {
    final String networkMode = options.getString(networkModeArg.getDest());
    if (!isNullOrEmpty(networkMode)) {
    final String token = options.getString(tokenArg.getDest());
    if (!isNullOrEmpty(token)) {
    final List<String> addCaps = options.getList(addCapabilityArg.getDest());
    if (addCaps != null && !addCaps.isEmpty()) {
    final List<String> dropCaps = options.getList(dropCapabilityArg.getDest());
    if (dropCaps != null && !dropCaps.isEmpty()) {
    // We build without a hash here because we want the hash to be calculated server-side.
    // This allows different CLI versions to be cross-compatible with different master versions
    // that have either more or fewer job parameters.
    final Job job = builder.buildWithoutHash();
    final Collection<String> errors = JOB_VALIDATOR.validate(job);
    if (!errors.isEmpty()) {
        if (!json) {
            for (final String error : errors) {
        } else {
            final CreateJobResponse createJobResponse = new CreateJobResponse(CreateJobResponse.Status.INVALID_JOB_DEFINITION, ImmutableList.copyOf(errors), job.getId().toString());
        return 1;
    if (!quiet && !json) {
        out.println("Creating job: " + job.toJsonString());
    final CreateJobResponse status = client.createJob(job).get();
    if (status.getStatus() == CreateJobResponse.Status.OK) {
        if (!quiet && !json) {
        if (json) {
        } else {
        return 0;
    } else {
        if (!quiet && !json) {
            out.println("Failed: " + status);
        } else if (json) {
        return 1;
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) DateTime(org.joda.time.DateTime) ServiceEndpoint(com.spotify.helios.common.descriptors.ServiceEndpoint) CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) ServicePorts(com.spotify.helios.common.descriptors.ServicePorts) PortMapping(com.spotify.helios.common.descriptors.PortMapping) Job(com.spotify.helios.common.descriptors.Job) File( Map(java.util.Map) ImmutableMap( HashMap(java.util.HashMap) ServiceEndpoint(com.spotify.helios.common.descriptors.ServiceEndpoint)

Example 13 with CreateJobResponse

use of com.spotify.helios.common.protocol.CreateJobResponse in project helios by spotify.

the class TemporaryJob method deploy.

void deploy() {
    final TemporaryJobReports.Step createJob = reportWriter.step("create job").tag("jobId", job.getId());
    try {
        // Create job"Creating job {}", job.getId().toShortString());
        final CreateJobResponse createResponse = get(client.createJob(job));
        if (createResponse.getStatus() != CreateJobResponse.Status.OK) {
            fail(format("Failed to create job %s - %s", job.getId(), createResponse.toString()));
    } catch (InterruptedException | ExecutionException | TimeoutException e) {
        fail(format("Failed to create job %s %s - %s", job.getId(), job.toString(), e));
    } finally {
    final TemporaryJobReports.Step deployJob = reportWriter.step("deploy job").tag("jobId", job.getId());
    try {
        // Deploy job
        final Deployment deployment = Deployment.of(job.getId(), Goal.START);
        for (final String host : hosts) {
            // HELIOS_HOST_ADDRESS is the IP address we should use to reach the host, instead of
            // the hostname. This is used when running a helios cluster inside a VM, and the containers
            // can be reached by IP address only, since DNS won't be able to resolve the host name of
            // the helios agent running in the VM.
            final HostStatus hostStatus = client.hostStatus(host).get();
            final String hostAddress = hostStatus.getEnvironment().get("HELIOS_HOST_ADDRESS");
            if (hostAddress != null) {
                hostToIp.put(host, hostAddress);
  "Deploying {} to {}", getJobDescription(job), host);
            final JobDeployResponse deployResponse = get(client.deploy(deployment, host));
            if (deployResponse.getStatus() != JobDeployResponse.Status.OK) {
                fail(format("Failed to deploy job %s %s - %s", job.getId(), job.toString(), deployResponse));
    } catch (InterruptedException | ExecutionException | TimeoutException e) {
        fail(format("Failed to deploy job %s %s - %s", job.getId(), job.toString(), e));
    } finally {
    try {
        // Wait for job to come up
        for (final String host : hosts) {
    } catch (TimeoutException e) {
        fail(format("Failed while probing job %s %s - %s", job.getId(), job.toString(), e));
Also used : CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) Deployment(com.spotify.helios.common.descriptors.Deployment) HostStatus(com.spotify.helios.common.descriptors.HostStatus) ExecutionException(java.util.concurrent.ExecutionException) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) TimeoutException(java.util.concurrent.TimeoutException)

Example 14 with CreateJobResponse

use of com.spotify.helios.common.protocol.CreateJobResponse in project helios by spotify.

the class ZooKeeperRestoreTest method setup.

public void setup() throws Exception {
    backupDir = Files.createTempDirectory("helios-zk-updating-persistent-dir-test-backup-");
    client = defaultClient();
    final CreateJobResponse created = client.createJob(fooJob).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());
Also used : CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) Before(org.junit.Before)

Example 15 with CreateJobResponse

use of com.spotify.helios.common.protocol.CreateJobResponse in project helios by spotify.

the class HeliosIT method test.

public void test() throws Exception {
    final CreateJobResponse create = cli(CreateJobResponse.class, "create", "test:1", "spotify/busybox:latest");
    assertThat(create.getStatus(), equalTo(CreateJobResponse.Status.OK));
    final JobDeployResponse deploy = cli(JobDeployResponse.class, "deploy", "test:1", TEST_HOST);
    assertThat(deploy.getStatus(), equalTo(JobDeployResponse.Status.OK));
    final JobUndeployResponse undeploy = cli(JobUndeployResponse.class, "undeploy", "--yes", "test:1", "-a");
    assertThat(undeploy.getStatus(), equalTo(JobUndeployResponse.Status.OK));
    final JobDeleteResponse delete = cli(JobDeleteResponse.class, "remove", "--yes", "test:1");
    assertThat(delete.getStatus(), equalTo(JobDeleteResponse.Status.OK));
Also used : CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) JobUndeployResponse(com.spotify.helios.common.protocol.JobUndeployResponse) JobDeployResponse(com.spotify.helios.common.protocol.JobDeployResponse) JobDeleteResponse(com.spotify.helios.common.protocol.JobDeleteResponse) Test(org.junit.Test)


CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)31 Test (org.junit.Test)24 Job (com.spotify.helios.common.descriptors.Job)16 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)16 HeliosClient (com.spotify.helios.client.HeliosClient)14 Deployment (com.spotify.helios.common.descriptors.Deployment)14 JobId (com.spotify.helios.common.descriptors.JobId)13 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)7 DockerClient (com.spotify.docker.client.DockerClient)6 AgentMain (com.spotify.helios.agent.AgentMain)4 JobUndeployResponse (com.spotify.helios.common.protocol.JobUndeployResponse)4 HostStatus (com.spotify.helios.common.descriptors.HostStatus)3 JobDeleteResponse (com.spotify.helios.common.protocol.JobDeleteResponse)3 ImmutableMap ( Container (com.spotify.docker.client.messages.Container)2 HostConfig (com.spotify.docker.client.messages.HostConfig)2 PortMapping (com.spotify.helios.common.descriptors.PortMapping)2 HostDeregisterResponse (com.spotify.helios.common.protocol.HostDeregisterResponse)2 Integer.toHexString (java.lang.Integer.toHexString)2 List (java.util.List)2