use of com.spotify.helios.common.descriptors.Goal in project helios by spotify.
the class HeliosSoloDeployment method undeployLeftoverJobs.
/**
* Undeploy jobs left over by {@link TemporaryJobs}. TemporaryJobs should clean these up,
* but sometimes a few are left behind for whatever reason.
*/
@VisibleForTesting
protected void undeployLeftoverJobs() {
try {
// See if there are jobs running on any helios agent. If we are using TemporaryJobs,
// that class should've undeployed them at this point.
// Any jobs still running at this point have only been partially cleaned up.
// We look for jobs via hostStatus() because the job may have been deleted from the master,
// but the agent may still not have had enough time to undeploy the job from itself.
final List<String> hosts = heliosClient.listHosts().get();
for (final String host : hosts) {
final HostStatus hostStatus = heliosClient.hostStatus(host).get();
final Map<JobId, TaskStatus> statuses = hostStatus.getStatuses();
for (final Map.Entry<JobId, TaskStatus> status : statuses.entrySet()) {
final JobId jobId = status.getKey();
final Goal goal = status.getValue().getGoal();
if (goal != Goal.UNDEPLOY) {
log.info("Job {} is still set to {} on host {}. Undeploying it now.", jobId, goal, host);
final JobUndeployResponse undeployResponse = heliosClient.undeploy(jobId, host).get();
log.info("Undeploy response for job {} is {}.", jobId, undeployResponse.getStatus());
if (undeployResponse.getStatus() != JobUndeployResponse.Status.OK) {
log.warn("Undeploy response for job {} was not OK. This could mean that something " + "beat the helios-solo master in telling the helios-solo agent to " + "undeploy.", jobId);
}
}
log.info("Waiting for job {} to actually be undeployed...", jobId);
awaitJobUndeployed(heliosClient, host, jobId, jobUndeployWaitSeconds, TimeUnit.SECONDS);
log.info("Job {} successfully undeployed.", jobId);
}
}
} catch (Exception e) {
log.warn("Exception occurred when trying to clean up leftover jobs.", e);
}
}
Aggregations