Search in sources :

Example 41 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class ZooKeeperMasterModel method getInitRollingUpdateOps.

private RollingUpdateOp getInitRollingUpdateOps(final DeploymentGroup deploymentGroup, final List<String> updateHosts, final List<String> undeployHosts, final ZooKeeperClient zooKeeperClient) throws KeeperException {
    final List<RolloutTask> rolloutTasks = new ArrayList<>();
    // give precedence to the updateHosts list so we don't end up in a state where we updated a host
    // and then removed the job from it (because of buggy logic in the calling method)
    final List<String> updateHostsCopy = new ArrayList<>(updateHosts);
    final List<String> undeployHostsCopy = new ArrayList<>(undeployHosts);
    undeployHostsCopy.removeAll(updateHostsCopy);
    // we only care about hosts that are UP
    final List<String> upHostsToUndeploy = undeployHostsCopy.stream().filter(host -> checkHostUp(zooKeeperClient, host)).collect(Collectors.toList());
    final List<String> upHostsToDeploy = updateHostsCopy.stream().filter(host -> checkHostUp(zooKeeperClient, host)).collect(Collectors.toList());
    rolloutTasks.addAll(RollingUndeployPlanner.of(deploymentGroup).plan(upHostsToUndeploy));
    rolloutTasks.addAll(RollingUpdatePlanner.of(deploymentGroup).plan(upHostsToDeploy));
    log.info("generated rolloutTasks for deployment-group name={} " + "updateHosts={} undeployHosts={}: {}", deploymentGroup.getName(), updateHosts, undeployHosts, rolloutTasks);
    final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(deploymentGroup).build();
    return new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY).start(deploymentGroup, zooKeeperClient);
}
Also used : Descriptor.parse(com.spotify.helios.common.descriptors.Descriptor.parse) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) LoggerFactory(org.slf4j.LoggerFactory) Stat(org.apache.zookeeper.data.Stat) ThrottleState(com.spotify.helios.common.descriptors.ThrottleState) FAILED(com.spotify.helios.common.descriptors.DeploymentGroupStatus.State.FAILED) ZooKeeperOperations.create(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.create) Collections.singletonList(java.util.Collections.singletonList) Optional.fromNullable(com.google.common.base.Optional.fromNullable) Json(com.spotify.helios.common.Json) RolloutOptions(com.spotify.helios.common.descriptors.RolloutOptions) RollingUndeployPlanner(com.spotify.helios.rollingupdate.RollingUndeployPlanner) ZooKeeperOperations.set(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.set) VersionedValue(com.spotify.helios.servicescommon.VersionedValue) Map(java.util.Map) Deployment(com.spotify.helios.common.descriptors.Deployment) TypeReference(com.fasterxml.jackson.core.type.TypeReference) JsonParseException(com.fasterxml.jackson.core.JsonParseException) HostInfo(com.spotify.helios.common.descriptors.HostInfo) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyList(java.util.Collections.emptyList) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) EventSender(com.spotify.helios.servicescommon.EventSender) Set(java.util.Set) PortMapping(com.spotify.helios.common.descriptors.PortMapping) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Objects(java.util.Objects) Nullable(org.jetbrains.annotations.Nullable) HOSTS_CHANGED(com.spotify.helios.common.descriptors.DeploymentGroup.RollingUpdateReason.HOSTS_CHANGED) TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) NotEmptyException(org.apache.zookeeper.KeeperException.NotEmptyException) MANUAL(com.spotify.helios.common.descriptors.DeploymentGroup.RollingUpdateReason.MANUAL) UP(com.spotify.helios.common.descriptors.HostStatus.Status.UP) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) RollingUpdateOpFactory(com.spotify.helios.rollingupdate.RollingUpdateOpFactory) Lists.reverse(com.google.common.collect.Lists.reverse) RollingUpdatePlanner(com.spotify.helios.rollingupdate.RollingUpdatePlanner) ZooKeeperOperations.delete(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.delete) Joiner(com.google.common.base.Joiner) JobId(com.spotify.helios.common.descriptors.JobId) RollingUpdateError(com.spotify.helios.rollingupdate.RollingUpdateError) Goal(com.spotify.helios.common.descriptors.Goal) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) Paths(com.spotify.helios.servicescommon.coordination.Paths) ZooKeeperRegistrarUtil(com.spotify.helios.servicescommon.ZooKeeperRegistrarUtil) ArrayList(java.util.ArrayList) ROLLING_OUT(com.spotify.helios.common.descriptors.DeploymentGroupStatus.State.ROLLING_OUT) Strings(com.google.common.base.Strings) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) HostStatus(com.spotify.helios.common.descriptors.HostStatus) DOWN(com.spotify.helios.common.descriptors.HostStatus.Status.DOWN) OpResult(org.apache.zookeeper.OpResult) Task(com.spotify.helios.common.descriptors.Task) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) Collections.emptyMap(java.util.Collections.emptyMap) Logger(org.slf4j.Logger) Job(com.spotify.helios.common.descriptors.Job) KeeperException(org.apache.zookeeper.KeeperException) Preconditions.checkNotNull(com.google.common.base.Preconditions.checkNotNull) MoreObjects(com.google.common.base.MoreObjects) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Maps(com.google.common.collect.Maps) DeploymentGroupEventFactory(com.spotify.helios.rollingupdate.DeploymentGroupEventFactory) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) Node(com.spotify.helios.servicescommon.coordination.Node) Ordering(com.google.common.collect.Ordering) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) JobStatus(com.spotify.helios.common.descriptors.JobStatus) ZooKeeperClientProvider(com.spotify.helios.servicescommon.coordination.ZooKeeperClientProvider) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) ZooKeeperOperations.check(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.check) Preconditions(com.google.common.base.Preconditions) AgentInfo(com.spotify.helios.common.descriptors.AgentInfo) Comparator(java.util.Comparator) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) Collections(java.util.Collections) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) RollingUpdateOpFactory(com.spotify.helios.rollingupdate.RollingUpdateOpFactory)

Example 42 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class JobsResource method post.

/**
   * Create a job given the definition in {@code job}.
   *
   * @param job The job to create.
   * @param username The user creating the job.
   * @return The response.
   */
@POST
@Produces(APPLICATION_JSON)
@Timed
@ExceptionMetered
public CreateJobResponse post(@Valid final Job job, @RequestUser final String username) {
    final Job.Builder clone = job.toBuilder().setCreatingUser(username).setCreated(clock.now().getMillis()).setHash(job.getId().getHash());
    final Job actualJob = clone.build();
    final Collection<String> errors = jobValidator.validate(actualJob);
    final String jobIdString = actualJob.getId().toString();
    if (!errors.isEmpty()) {
        throw badRequest(new CreateJobResponse(INVALID_JOB_DEFINITION, ImmutableList.copyOf(errors), jobIdString));
    }
    try {
        model.addJob(actualJob);
    } catch (JobExistsException e) {
        throw badRequest(new CreateJobResponse(JOB_ALREADY_EXISTS, ImmutableList.<String>of(), jobIdString));
    }
    log.info("created job: {}", actualJob);
    return new CreateJobResponse(CreateJobResponse.Status.OK, ImmutableList.<String>of(), jobIdString);
}
Also used : CreateJobResponse(com.spotify.helios.common.protocol.CreateJobResponse) JobExistsException(com.spotify.helios.master.JobExistsException) Job(com.spotify.helios.common.descriptors.Job) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) Timed(com.codahale.metrics.annotation.Timed) ExceptionMetered(com.codahale.metrics.annotation.ExceptionMetered)

Example 43 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class ExpiredJobReaper method runOneIteration.

@Override
public void runOneIteration() {
    for (final Entry<JobId, Job> entry : masterModel.getJobs().entrySet()) {
        final JobId jobId = entry.getKey();
        final Job job = entry.getValue();
        if (job.getExpires() == null) {
            //noinspection UnnecessaryContinue
            continue;
        } else if (job.getExpires().getTime() <= clock.now().getMillis()) {
            final JobStatus status = masterModel.getJobStatus(jobId);
            final List<String> hosts = ImmutableList.copyOf(status.getDeployments().keySet());
            for (final String host : hosts) {
                try {
                    masterModel.undeployJob(host, jobId, job.getToken());
                } catch (HostNotFoundException e) {
                    log.error("couldn't undeploy job {} from host {} when it hit deadline", jobId, host, e);
                } catch (JobNotDeployedException e) {
                    log.debug("job {} was already undeployed when it hit deadline", jobId, e);
                } catch (TokenVerificationException e) {
                    log.error("couldn't undeploy job {} from host {} because token verification failed", jobId, host, e);
                }
            }
            try {
                masterModel.removeJob(jobId, job.getToken());
            } catch (JobDoesNotExistException e) {
                log.debug("job {} was already removed when it hit deadline", jobId, e);
            } catch (JobStillDeployedException e) {
                log.debug("job {} still deployed on some host(s) after expiry reap", jobId, e);
            } catch (TokenVerificationException e) {
                log.error("couldn't remove job {} because token verification failed", jobId, e);
            }
        }
    }
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) JobDoesNotExistException(com.spotify.helios.master.JobDoesNotExistException) HostNotFoundException(com.spotify.helios.master.HostNotFoundException) TokenVerificationException(com.spotify.helios.master.TokenVerificationException) JobStillDeployedException(com.spotify.helios.master.JobStillDeployedException) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) JobNotDeployedException(com.spotify.helios.master.JobNotDeployedException) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId)

Example 44 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class JobHistoryReaper method processItem.

@Override
void processItem(final String jobId) {
    log.info("Deciding whether to reap job history for job {}", jobId);
    final JobId id = JobId.fromString(jobId);
    final Job job = masterModel.getJob(id);
    if (job == null) {
        try {
            client.deleteRecursive(Paths.historyJob(id));
            log.info("Reaped job history for job {}", jobId);
        } catch (NoNodeException ignored) {
        // Something deleted the history right before we got to it. Ignore and keep going.
        } catch (KeeperException e) {
            log.warn("error reaping job history for job {}", jobId, e);
        }
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) KeeperException(org.apache.zookeeper.KeeperException)

Example 45 with Job

use of com.spotify.helios.common.descriptors.Job in project helios by spotify.

the class ZooKeeperMasterModel method getJobs.

/**
   * Returns a {@link Map} of {@link JobId} to {@link Job} objects for all of the jobs known.
   */
@Override
public Map<JobId, Job> getJobs() {
    log.debug("getting jobs");
    final String folder = Paths.configJobs();
    final ZooKeeperClient client = provider.get("getJobs");
    try {
        final List<String> ids;
        try {
            ids = client.getChildren(folder);
        } catch (NoNodeException e) {
            return Maps.newHashMap();
        }
        final Map<JobId, Job> descriptors = Maps.newHashMap();
        for (final String id : ids) {
            final JobId jobId = JobId.fromString(id);
            final String path = Paths.configJob(jobId);
            try {
                final byte[] data = client.getData(path);
                final Job descriptor = parse(data, Job.class);
                descriptors.put(descriptor.getId(), descriptor);
            } catch (NoNodeException e) {
                // Ignore, the job was deleted before we had a chance to read it.
                log.debug("Ignoring deleted job {}", jobId);
            }
        }
        return descriptors;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting jobs failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) IOException(java.io.IOException) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

Job (com.spotify.helios.common.descriptors.Job)79 Test (org.junit.Test)57 JobId (com.spotify.helios.common.descriptors.JobId)38 HeliosClient (com.spotify.helios.client.HeliosClient)25 Deployment (com.spotify.helios.common.descriptors.Deployment)21 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)16 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)15 JobStatus (com.spotify.helios.common.descriptors.JobStatus)12 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)11 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)10 KeeperException (org.apache.zookeeper.KeeperException)10 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)10 PortMapping (com.spotify.helios.common.descriptors.PortMapping)9 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)8 ServiceEndpoint (com.spotify.helios.common.descriptors.ServiceEndpoint)8 Matchers.containsString (org.hamcrest.Matchers.containsString)8 DockerClient (com.spotify.docker.client.DockerClient)7 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)7 IOException (java.io.IOException)7 ServicePorts (com.spotify.helios.common.descriptors.ServicePorts)6