Search in sources :

Example 46 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class ZooKeeperMasterModel method deployJobRetry.

private void deployJobRetry(final ZooKeeperClient client, final String host, final Deployment deployment, int count, final String token) throws JobDoesNotExistException, JobAlreadyDeployedException, HostNotFoundException, JobPortAllocationConflictException, TokenVerificationException {
    if (count == 3) {
        throw new HeliosRuntimeException("3 failures (possibly concurrent modifications) while " + "deploying. Giving up.");
    }
    log.info("deploying {}: {} (retry={})", deployment, host, count);
    final JobId id = deployment.getJobId();
    final Job job = getJob(id);
    if (job == null) {
        throw new JobDoesNotExistException(id);
    }
    verifyToken(token, job);
    final UUID operationId = UUID.randomUUID();
    final String jobPath = Paths.configJob(id);
    try {
        Paths.configHostJob(host, id);
    } catch (IllegalArgumentException e) {
        throw new HostNotFoundException("Could not find Helios host '" + host + "'");
    }
    final String taskPath = Paths.configHostJob(host, id);
    final String taskCreationPath = Paths.configHostJobCreation(host, id, operationId);
    final List<Integer> staticPorts = staticPorts(job);
    final Map<String, byte[]> portNodes = Maps.newHashMap();
    final byte[] idJson = id.toJsonBytes();
    for (final int port : staticPorts) {
        final String path = Paths.configHostPort(host, port);
        portNodes.put(path, idJson);
    }
    final Task task = new Task(job, deployment.getGoal(), deployment.getDeployerUser(), deployment.getDeployerMaster(), deployment.getDeploymentGroupName());
    final List<ZooKeeperOperation> operations = Lists.newArrayList(check(jobPath), create(portNodes), create(Paths.configJobHost(id, host)));
    // Attempt to read a task here.
    try {
        client.getNode(taskPath);
        // if we get here the node exists already
        throw new JobAlreadyDeployedException(host, id);
    } catch (NoNodeException e) {
        operations.add(create(taskPath, task));
        operations.add(create(taskCreationPath));
    } catch (KeeperException e) {
        throw new HeliosRuntimeException("reading existing task description failed", e);
    }
    // TODO (dano): Failure handling is racy wrt agent and job modifications.
    try {
        client.transaction(operations);
        log.info("deployed {}: {} (retry={})", deployment, host, count);
    } catch (NoNodeException e) {
        // Either the job, the host or the task went away
        assertJobExists(client, id);
        assertHostExists(client, host);
        // If the job and host still exists, we likely tried to redeploy a job that had an UNDEPLOY
        // goal and lost the race with the agent removing the task before we could set it. Retry.
        deployJobRetry(client, host, deployment, count + 1, token);
    } catch (NodeExistsException e) {
        // Check for conflict due to transaction retry
        try {
            if (client.exists(taskCreationPath) != null) {
                // Our creation operation node existed, we're done here
                return;
            }
        } catch (KeeperException ex) {
            throw new HeliosRuntimeException("checking job deployment failed", ex);
        }
        try {
            // Check if the job was already deployed
            if (client.stat(taskPath) != null) {
                throw new JobAlreadyDeployedException(host, id);
            }
        } catch (KeeperException ex) {
            throw new HeliosRuntimeException("checking job deployment failed", e);
        }
        // Check for static port collisions
        for (final int port : staticPorts) {
            checkForPortConflicts(client, host, port, id);
        }
        // Catch all for logic and ephemeral issues
        throw new HeliosRuntimeException("deploying job failed", e);
    } catch (KeeperException e) {
        throw new HeliosRuntimeException("deploying job failed", e);
    }
}
Also used : Task(com.spotify.helios.common.descriptors.Task) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) Job(com.spotify.helios.common.descriptors.Job) UUID(java.util.UUID) JobId(com.spotify.helios.common.descriptors.JobId) KeeperException(org.apache.zookeeper.KeeperException)

Example 47 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class ZooKeeperMasterModel method undeployJob.

/**
   * Undeploys the job specified by {@code jobId} on {@code host}.
   */
@Override
public Deployment undeployJob(final String host, final JobId jobId, final String token) throws HostNotFoundException, JobNotDeployedException, TokenVerificationException {
    log.info("undeploying {}: {}", jobId, host);
    final ZooKeeperClient client = provider.get("undeployJob");
    assertHostExists(client, host);
    final Deployment deployment = getDeployment(host, jobId);
    if (deployment == null) {
        throw new JobNotDeployedException(host, jobId);
    }
    final Job job = getJob(client, jobId);
    verifyToken(token, job);
    final String configHostJobPath = Paths.configHostJob(host, jobId);
    try {
        // use listRecursive to remove both job node and its child creation node
        final List<String> nodes = newArrayList(reverse(client.listRecursive(configHostJobPath)));
        nodes.add(Paths.configJobHost(jobId, host));
        final List<Integer> staticPorts = staticPorts(job);
        for (final int port : staticPorts) {
            nodes.add(Paths.configHostPort(host, port));
        }
        client.transaction(delete(nodes));
    } catch (NoNodeException e) {
        // throw an exception and handle it the same as if we discovered this earlier.
        throw new JobNotDeployedException(host, jobId);
    } catch (KeeperException e) {
        throw new HeliosRuntimeException("Removing deployment failed", e);
    }
    return deployment;
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) Deployment(com.spotify.helios.common.descriptors.Deployment) Job(com.spotify.helios.common.descriptors.Job) KeeperException(org.apache.zookeeper.KeeperException)

Example 48 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class ZooKeeperMasterModel method getUndeployOperations.

private List<ZooKeeperOperation> getUndeployOperations(final ZooKeeperClient client, final String host, final JobId jobId, final String token) throws HostNotFoundException, JobNotDeployedException, TokenVerificationException {
    assertHostExists(client, host);
    final Deployment deployment = getDeployment(host, jobId);
    if (deployment == null) {
        throw new JobNotDeployedException(host, jobId);
    }
    final Job job = getJob(client, jobId);
    verifyToken(token, job);
    final String configHostJobPath = Paths.configHostJob(host, jobId);
    try {
        // use listRecursive to remove both job node and its child creation node
        final List<String> nodes = newArrayList(reverse(client.listRecursive(configHostJobPath)));
        nodes.add(Paths.configJobHost(jobId, host));
        final List<Integer> staticPorts = staticPorts(job);
        for (final int port : staticPorts) {
            nodes.add(Paths.configHostPort(host, port));
        }
        return ImmutableList.of(delete(nodes));
    } catch (NoNodeException e) {
        // throw an exception and handle it the same as if we discovered this earlier.
        throw new JobNotDeployedException(host, jobId);
    } catch (KeeperException e) {
        throw new HeliosRuntimeException("calculating undeploy operations failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) Deployment(com.spotify.helios.common.descriptors.Deployment) Job(com.spotify.helios.common.descriptors.Job) KeeperException(org.apache.zookeeper.KeeperException)

Example 49 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class JobValidatorTest method testWhitelistedCapabilities_withWhitelist.

@Test
public void testWhitelistedCapabilities_withWhitelist() throws Exception {
    final JobValidator validator = new JobValidator(true, true, ImmutableSet.of("cap1"));
    final Job job = Job.newBuilder().setName("foo").setVersion("1").setImage("foobar").setAddCapabilities(ImmutableSet.of("cap1", "cap2")).build();
    final Set<String> errors = validator.validate(job);
    assertEquals(1, errors.size());
    assertThat(errors.iterator().next(), equalTo("The following Linux capabilities aren't allowed by the Helios master: 'cap2'. " + "The allowed capabilities are: 'cap1'."));
    final JobValidator validator2 = new JobValidator(true, true, ImmutableSet.of("cap1", "cap2"));
    final Set<String> errors2 = validator2.validate(job);
    assertEquals(0, errors2.size());
}
Also used : Matchers.containsString(org.hamcrest.Matchers.containsString) Job(com.spotify.helios.common.descriptors.Job) Test(org.junit.Test)

Example 50 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class JobValidatorTest method testValidPortTagsPass.

@Test
public void testValidPortTagsPass() {
    final Job j = Job.newBuilder().setName("foo").setVersion("1").setImage("foobar").build();
    final Job.Builder builder = j.toBuilder();
    final Map<String, PortMapping> ports = ImmutableMap.of("add_ports1", PortMapping.of(1234), "add_ports2", PortMapping.of(2345));
    final ImmutableMap.Builder<String, ServicePortParameters> servicePortsBuilder = ImmutableMap.builder();
    servicePortsBuilder.put("add_ports1", new ServicePortParameters(ImmutableList.of("tag1", "tag2")));
    servicePortsBuilder.put("add_ports2", new ServicePortParameters(ImmutableList.of("tag3", "tag4")));
    final ServicePorts servicePorts = new ServicePorts(servicePortsBuilder.build());
    final Map<ServiceEndpoint, ServicePorts> addRegistration = ImmutableMap.of(ServiceEndpoint.of("add_service", "add_proto"), servicePorts);
    builder.setPorts(ports).setRegistration(addRegistration);
    assertThat(validator.validate(builder.build()), is(empty()));
}
Also used : ServicePorts(com.spotify.helios.common.descriptors.ServicePorts) ServicePortParameters(com.spotify.helios.common.descriptors.ServicePortParameters) Matchers.containsString(org.hamcrest.Matchers.containsString) PortMapping(com.spotify.helios.common.descriptors.PortMapping) Job(com.spotify.helios.common.descriptors.Job) ImmutableMap(com.google.common.collect.ImmutableMap) ServiceEndpoint(com.spotify.helios.common.descriptors.ServiceEndpoint) Test(org.junit.Test)

Aggregations

Job (com.spotify.helios.common.descriptors.Job)79 Test (org.junit.Test)57 JobId (com.spotify.helios.common.descriptors.JobId)38 HeliosClient (com.spotify.helios.client.HeliosClient)25 Deployment (com.spotify.helios.common.descriptors.Deployment)21 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)16 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)15 JobStatus (com.spotify.helios.common.descriptors.JobStatus)12 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)11 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)10 KeeperException (org.apache.zookeeper.KeeperException)10 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)10 PortMapping (com.spotify.helios.common.descriptors.PortMapping)9 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)8 ServiceEndpoint (com.spotify.helios.common.descriptors.ServiceEndpoint)8 Matchers.containsString (org.hamcrest.Matchers.containsString)8 DockerClient (com.spotify.docker.client.DockerClient)7 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)7 IOException (java.io.IOException)7 ServicePorts (com.spotify.helios.common.descriptors.ServicePorts)6