use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.
the class ZooKeeperMasterModel method getTaskStatuses.
private Map<JobId, TaskStatus> getTaskStatuses(final ZooKeeperClient client, final String host) {
final Map<JobId, TaskStatus> statuses = Maps.newHashMap();
final List<JobId> jobIds = listHostJobs(client, host);
for (final JobId jobId : jobIds) {
TaskStatus status;
try {
status = getTaskStatus(client, host, jobId);
} catch (HeliosRuntimeException e) {
// Skip this task status so we can return other available information instead of failing the
// entire thing.
status = null;
}
if (status != null) {
statuses.put(jobId, status);
} else {
log.debug("Task {} status missing for host {}", jobId, host);
}
}
return statuses;
}
use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.
the class TaskHistoryWriter method add.
private void add(TaskStatusEvent item) throws InterruptedException {
// If too many "globally", toss them
while (count.get() >= MAX_TOTAL_SIZE) {
getNext();
}
final JobId key = item.getStatus().getJob().getId();
final Deque<TaskStatusEvent> deque = getDeque(key);
synchronized (deque) {
// if too many in the particular deque, toss them
while (deque.size() >= MAX_QUEUE_SIZE) {
deque.remove();
count.decrementAndGet();
}
deque.add(item);
count.incrementAndGet();
}
try {
backingStore.set(items);
} catch (ClosedByInterruptException e) {
log.debug("Writing task status event to backing store was interrupted");
} catch (IOException e) {
// We are best effort after all...
log.warn("Failed to write task status event to backing store", e);
}
}
use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.
the class ZooKeeperRegistrarUtil method deregisterHost.
public static void deregisterHost(final ZooKeeperClient client, final String host) throws HostNotFoundException, HostStillInUseException {
log.info("deregistering host: {}", host);
// TODO (dano): handle retry failures
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
if (client.exists(Paths.configHost(host)) == null) {
throw new HostNotFoundException("host [" + host + "] does not exist");
}
// Remove all jobs deployed to this host
final List<String> jobs = safeGetChildren(client, Paths.configHostJobs(host));
for (final String jobString : jobs) {
final JobId job = JobId.fromString(jobString);
final String hostJobPath = Paths.configHostJob(host, job);
final List<String> nodes = safeListRecursive(client, hostJobPath);
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
if (client.exists(Paths.configJobHost(job, host)) != null) {
operations.add(delete(Paths.configJobHost(job, host)));
}
// Clean out the history for each job
final List<String> history = safeListRecursive(client, Paths.historyJobHost(job, host));
for (final String s : reverse(history)) {
operations.add(delete(s));
}
}
operations.add(delete(Paths.configHostJobs(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// Remove port allocations
final List<String> ports = safeGetChildren(client, Paths.configHostPorts(host));
for (final String port : ports) {
operations.add(delete(Paths.configHostPort(host, Integer.valueOf(port))));
}
operations.add(delete(Paths.configHostPorts(host)));
// Remove host id
final String idPath = Paths.configHostId(host);
if (client.exists(idPath) != null) {
operations.add(delete(idPath));
}
// Remove host config root
operations.add(delete(Paths.configHost(host)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.
the class ZooKeeperMasterModel method rollingUpdateAwaitRunning.
private RollingUpdateOp rollingUpdateAwaitRunning(final ZooKeeperClient client, final RollingUpdateOpFactory opFactory, final DeploymentGroup deploymentGroup, final String host) {
final TaskStatus taskStatus = getTaskStatus(client, host, deploymentGroup.getJobId());
final JobId jobId = deploymentGroup.getJobId();
if (taskStatus == null) {
// Handle cases where agent has not written job status to zookeeper.
// If job is not listed under /config/hosts node, it may have been deployed successfully and
// then manually undeployed. The job will not get redeployed, so treat this as a failure.
final Deployment deployment = getDeployment(host, jobId);
if (deployment == null) {
return opFactory.error("Job unexpectedly undeployed. Perhaps it was manually undeployed?", host, RollingUpdateError.JOB_UNEXPECTEDLY_UNDEPLOYED);
}
// Check if we've exceeded the timeout for the rollout operation.
if (isRolloutTimedOut(client, deploymentGroup)) {
return opFactory.error("timed out while retrieving job status", host, RollingUpdateError.TIMED_OUT_RETRIEVING_JOB_STATUS);
}
// We haven't detected any errors, so assume the agent will write the status soon.
return opFactory.yield();
} else if (!taskStatus.getState().equals(TaskStatus.State.RUNNING)) {
if (isRolloutTimedOut(client, deploymentGroup)) {
// We exceeded the configured deploy timeout, and this job is still not running
return rollingUpdateTimedoutError(opFactory, host, jobId, taskStatus);
}
return opFactory.yield();
} else {
// the job is running on the host. last thing we have to ensure is that it was
// deployed by this deployment group. otherwise some weird conflict has occurred and we
// won't be able to undeploy the job on the next update.
final Deployment deployment = getDeployment(host, deploymentGroup.getJobId());
if (deployment == null) {
return opFactory.error("deployment for this job not found in zookeeper. " + "Perhaps it was manually undeployed?", host, RollingUpdateError.JOB_UNEXPECTEDLY_UNDEPLOYED);
} else if (!Objects.equals(deployment.getDeploymentGroupName(), deploymentGroup.getName())) {
return opFactory.error("job was already deployed, either manually or by a different deployment group", host, RollingUpdateError.JOB_ALREADY_DEPLOYED);
}
return opFactory.nextTask();
}
}
use of com.spotify.helios.common.descriptors.JobId in project helios by spotify.
the class ZooKeeperMasterModel method getDeployOperations.
private List<ZooKeeperOperation> getDeployOperations(final ZooKeeperClient client, final String host, final Deployment deployment, final String token) throws JobDoesNotExistException, JobAlreadyDeployedException, TokenVerificationException, HostNotFoundException, JobPortAllocationConflictException {
assertHostExists(client, host);
final JobId id = deployment.getJobId();
final Job job = getJob(id);
if (job == null) {
throw new JobDoesNotExistException(id);
}
verifyToken(token, job);
final UUID operationId = UUID.randomUUID();
final String jobPath = Paths.configJob(id);
final String taskPath = Paths.configHostJob(host, id);
final String taskCreationPath = Paths.configHostJobCreation(host, id, operationId);
final List<Integer> staticPorts = staticPorts(job);
final Map<String, byte[]> portNodes = Maps.newHashMap();
final byte[] idJson = id.toJsonBytes();
for (final int port : staticPorts) {
final String path = Paths.configHostPort(host, port);
portNodes.put(path, idJson);
}
final Task task = new Task(job, deployment.getGoal(), deployment.getDeployerUser(), deployment.getDeployerMaster(), deployment.getDeploymentGroupName());
final List<ZooKeeperOperation> operations = Lists.newArrayList(check(jobPath), create(portNodes), create(Paths.configJobHost(id, host)));
// Attempt to read a task here.
try {
client.getNode(taskPath);
// if we get here the node exists already
throw new JobAlreadyDeployedException(host, id);
} catch (NoNodeException e) {
// if the real reason of the failure is that the job is already deployed.
for (final int port : staticPorts) {
checkForPortConflicts(client, host, port, id);
}
operations.add(create(taskPath, task));
operations.add(create(taskCreationPath));
} catch (KeeperException e) {
throw new HeliosRuntimeException("reading existing task description failed", e);
}
return ImmutableList.copyOf(operations);
}
Aggregations