Search in sources :

Example 31 with JobId

use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.

the class OldJobReaper method processItem.

@Override
void processItem(final Job job) {
    final JobId jobId = job.getId();
    try {
        final JobStatus jobStatus = masterModel.getJobStatus(jobId);
        final Map<String, Deployment> deployments = jobStatus.getDeployments();
        final List<TaskStatusEvent> events = masterModel.getJobHistory(jobId);
        boolean reap;
        if (deployments.isEmpty()) {
            if (events.isEmpty()) {
                final Long created = job.getCreated();
                if (created == null) {
                    log.info("Marked job '{}' for reaping (not deployed, no history, no creation date)", jobId);
                    reap = true;
                } else if ((clock.now().getMillis() - created) > retentionMillis) {
                    log.info("Marked job '{}' for reaping (not deployed, no history, creation date " + "of {} before retention time of {} days)", jobId, DATE_FORMATTER.print(created), retentionDays);
                    reap = true;
                } else {
                    log.info("NOT reaping job '{}' (not deployed, no history, creation date of {} after " + "retention time of {} days)", jobId, DATE_FORMATTER.print(created), retentionDays);
                    reap = false;
                }
            } else {
                // Get the last event which is the most recent
                final TaskStatusEvent event = events.get(events.size() - 1);
                final String eventDate = DATE_FORMATTER.print(event.getTimestamp());
                // Calculate the amount of time in milliseconds that has elapsed since the last event
                final long unusedDurationMillis = clock.now().getMillis() - event.getTimestamp();
                // A job not deployed, with history, and last used recently should NOT BE reaped
                if (unusedDurationMillis > retentionMillis) {
                    log.info("Marked job '{}' for reaping (not deployed, has history whose last event " + "on {} was before the retention time of {} days)", jobId, eventDate, retentionDays);
                    reap = true;
                } else {
                    log.info("NOT reaping job '{}' (not deployed, has history whose last event " + "on {} was after the retention time of {} days)", jobId, eventDate, retentionDays);
                    reap = false;
                }
            }
        } else {
            // A job that's deployed should NOT BE reaped regardless of its history or creation date
            reap = false;
        }
        if (reap) {
            try {
                log.info("reaping old job '{}'", job.getId());
                masterModel.removeJob(jobId, job.getToken());
            } catch (Exception e) {
                log.warn("Failed to reap old job '{}'", jobId, e);
            }
        }
    } catch (Exception e) {
        log.warn("Failed to determine if job '{}' should be reaped", jobId, e);
    }
}
Also used : JobStatus(com.spotify.helios.common.descriptors.JobStatus) TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) Deployment(com.spotify.helios.common.descriptors.Deployment) JobId(com.spotify.helios.common.descriptors.JobId)

Example 32 with JobId

use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.

the class DeploymentGroupResource method getDeploymentGroupStatus.

@GET
@Path("/{name}/status")
@Produces(APPLICATION_JSON)
@Timed
@ExceptionMetered
public Response getDeploymentGroupStatus(@PathParam("name") @Valid final String name) {
    try {
        final DeploymentGroup deploymentGroup = model.getDeploymentGroup(name);
        final DeploymentGroupStatus deploymentGroupStatus = model.getDeploymentGroupStatus(name);
        final List<String> hosts = model.getDeploymentGroupHosts(name);
        final List<DeploymentGroupStatusResponse.HostStatus> result = Lists.newArrayList();
        for (final String host : hosts) {
            final HostStatus hostStatus = model.getHostStatus(host);
            JobId deployedJobId = null;
            TaskStatus.State state = null;
            if (hostStatus != null && hostStatus.getStatus().equals(HostStatus.Status.UP)) {
                for (final Map.Entry<JobId, Deployment> entry : hostStatus.getJobs().entrySet()) {
                    if (name.equals(entry.getValue().getDeploymentGroupName())) {
                        deployedJobId = entry.getKey();
                        final TaskStatus taskStatus = hostStatus.getStatuses().get(deployedJobId);
                        if (taskStatus != null) {
                            state = taskStatus.getState();
                        }
                        break;
                    }
                }
                result.add(new DeploymentGroupStatusResponse.HostStatus(host, deployedJobId, state));
            }
        }
        final DeploymentGroupStatusResponse.Status status;
        if (deploymentGroupStatus == null) {
            status = DeploymentGroupStatusResponse.Status.IDLE;
        } else if (deploymentGroupStatus.getState() == DeploymentGroupStatus.State.FAILED) {
            status = DeploymentGroupStatusResponse.Status.FAILED;
        } else if (deploymentGroupStatus.getState() == DeploymentGroupStatus.State.ROLLING_OUT) {
            status = DeploymentGroupStatusResponse.Status.ROLLING_OUT;
        } else {
            status = DeploymentGroupStatusResponse.Status.ACTIVE;
        }
        final String error = deploymentGroupStatus == null ? "" : deploymentGroupStatus.getError();
        return Response.ok(new DeploymentGroupStatusResponse(deploymentGroup, status, error, result, deploymentGroupStatus)).build();
    } catch (final DeploymentGroupDoesNotExistException e) {
        return Response.status(Response.Status.NOT_FOUND).build();
    }
}
Also used : Deployment(com.spotify.helios.common.descriptors.Deployment) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) DeploymentGroupDoesNotExistException(com.spotify.helios.master.DeploymentGroupDoesNotExistException) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) DeploymentGroupStatusResponse(com.spotify.helios.common.protocol.DeploymentGroupStatusResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Map(java.util.Map) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) JobId(com.spotify.helios.common.descriptors.JobId) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) Timed(com.codahale.metrics.annotation.Timed) GET(javax.ws.rs.GET) ExceptionMetered(com.codahale.metrics.annotation.ExceptionMetered)

Example 33 with JobId

use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.

the class TaskHistoryWriter method putBack.

private void putBack(TaskStatusEvent event) {
    final JobId key = event.getStatus().getJob().getId();
    final Deque<TaskStatusEvent> queue = getDeque(key);
    synchronized (queue) {
        if (queue.size() >= MAX_QUEUE_SIZE) {
            // already full, just toss the event
            return;
        }
        queue.push(event);
        count.incrementAndGet();
    }
}
Also used : TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) JobId(com.spotify.helios.common.descriptors.JobId)

Example 34 with JobId

use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.

the class TaskHistoryWriter method getNext.

private TaskStatusEvent getNext() {
    while (true) {
        final TaskStatusEvent current = findEldestEvent();
        // Didn't find anything that needed processing?
        if (current == null) {
            return null;
        }
        final JobId id = current.getStatus().getJob().getId();
        final Deque<TaskStatusEvent> deque = items.get(id);
        if (deque == null) {
            // shouldn't happen because we should be the only one pulling items off, but....
            continue;
        }
        synchronized (deque) {
            if (!deque.peek().equals(current)) {
                // item got rolled off, try again
                continue;
            }
            // Pull it off the queue and be paranoid.
            final TaskStatusEvent newCurrent = deque.poll();
            count.decrementAndGet();
            checkState(current.equals(newCurrent), "current should equal newCurrent");
            // Safe because this is the *only* place we hold these two locks at the same time.
            synchronized (items) {
                // Extra paranoia: curDeque should always == deque
                final Deque<TaskStatusEvent> curDeque = items.get(id);
                if (curDeque != null && curDeque.isEmpty()) {
                    items.remove(id);
                }
            }
            return current;
        }
    }
}
Also used : TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) JobId(com.spotify.helios.common.descriptors.JobId)

Example 35 with JobId

use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.

the class ZooKeeperMasterModel method getHostStatus.

/**
   * Returns the current status of the host named by {@code host}.
   */
@Override
public HostStatus getHostStatus(final String host) {
    final ZooKeeperClient client = provider.get("getHostStatus");
    if (!ZooKeeperRegistrarUtil.isHostRegistered(client, host)) {
        log.warn("Host {} isn't registered in ZooKeeper.", host);
        return null;
    }
    final boolean up = checkHostUp(client, host);
    final HostInfo hostInfo = getHostInfo(client, host);
    final AgentInfo agentInfo = getAgentInfo(client, host);
    final Map<JobId, Deployment> tasks = getTasks(client, host);
    final Map<JobId, TaskStatus> statuses = getTaskStatuses(client, host);
    final Map<String, String> environment = getEnvironment(client, host);
    final Map<String, String> labels = getLabels(client, host);
    return HostStatus.newBuilder().setJobs(tasks).setStatuses(fromNullable(statuses).or(EMPTY_STATUSES)).setHostInfo(hostInfo).setAgentInfo(agentInfo).setStatus(up ? UP : DOWN).setEnvironment(environment).setLabels(labels).build();
}
Also used : ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Deployment(com.spotify.helios.common.descriptors.Deployment) AgentInfo(com.spotify.helios.common.descriptors.AgentInfo) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) HostInfo(com.spotify.helios.common.descriptors.HostInfo) JobId(com.spotify.helios.common.descriptors.JobId)

Aggregations

JobId (com.spotify.helios.common.descriptors.JobId)115 Test (org.junit.Test)68 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)41 Job (com.spotify.helios.common.descriptors.Job)37 HeliosClient (com.spotify.helios.client.HeliosClient)35 Deployment (com.spotify.helios.common.descriptors.Deployment)29 Matchers.containsString (org.hamcrest.Matchers.containsString)25 DockerClient (com.spotify.docker.client.DockerClient)19 JobStatus (com.spotify.helios.common.descriptors.JobStatus)19 JobDeployResponse (com.spotify.helios.common.protocol.JobDeployResponse)16 CreateJobResponse (com.spotify.helios.common.protocol.CreateJobResponse)13 IOException (java.io.IOException)12 HostStatus (com.spotify.helios.common.descriptors.HostStatus)11 Map (java.util.Map)11 LogStream (com.spotify.docker.client.LogStream)10 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)10 KeeperException (org.apache.zookeeper.KeeperException)9 TaskStatusEvent (com.spotify.helios.common.descriptors.TaskStatusEvent)8 AgentMain (com.spotify.helios.agent.AgentMain)7 PortMapping (com.spotify.helios.common.descriptors.PortMapping)7