Search in sources :

Example 1 with KillJobResponse

use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse in project mantis by Netflix.

the class JobClustersManagerActor method onJobKillRequest.

@Override
public void onJobKillRequest(final KillJobRequest request) {
    logger.info("Killing job " + request);
    ActorRef sender = getSender();
    JobId jobIdToKill = request.getJobId();
    Optional<JobClusterInfo> jobClusterInfo = jobClusterInfoManager.getJobClusterInfo(jobIdToKill.getCluster());
    if (jobClusterInfo.isPresent()) {
        jobClusterInfo.get().jobClusterActor.tell(new JobClusterProto.KillJobRequest(request.getJobId(), request.getReason(), JobCompletedReason.Killed, request.getUser(), sender), getSelf());
    } else {
        logger.info("Job cluster {} not found", jobIdToKill.getCluster());
        sender.tell(new KillJobResponse(request.requestId, CLIENT_ERROR_NOT_FOUND, JobState.Noop, "Job cluster " + jobIdToKill.getCluster() + " doesn't exist", jobIdToKill, request.getUser()), getSelf());
    }
}
Also used : KillJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) ActorRef(akka.actor.ActorRef) JobId(io.mantisrx.server.master.domain.JobId)

Example 2 with KillJobResponse

use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse in project mantis by Netflix.

the class JobClusterActor method onKillJobResponse.

/**
 * Sent by job actor when the job shutdown is initiated.
 * @param resp Kill job response message
 */
@Override
public void onKillJobResponse(JobClusterProto.KillJobResponse resp) {
    if (logger.isTraceEnabled()) {
        logger.trace("Enter onKillJobResponse {}", resp);
    }
    if (resp.responseCode == SUCCESS) {
        Optional<JobInfo> jInfo = jobManager.getJobInfoForNonTerminalJob(resp.jobId);
        if (jInfo.isPresent()) {
            // stop watching actor
            getContext().unwatch(jInfo.get().jobActor);
            numJobShutdowns.increment();
            logger.info("Marking job {} as terminated", jInfo.get().jobId);
            // check requestor is not self to avoid an infinite loop
            if (resp.requestor != null && !getSelf().equals(resp.requestor)) {
                resp.requestor.tell(new KillJobResponse(resp.requestId, resp.responseCode, resp.state, resp.message, resp.jobId, resp.user), getSelf());
            }
            Optional<CompletedJob> completedJob = jobManager.markCompleted(resp.jobId, resp.jobMetadata, resp.state);
            if (completedJob.isPresent()) {
                logger.info("In cleanupAfterJobKill for Job {} in state {} and metadata {} ", resp.jobId, resp.state, resp.jobMetadata);
                // enforce SLA
                if (!jobClusterMetadata.isDisabled()) {
                    SLA sla = this.jobClusterMetadata.getJobClusterDefinition().getSLA();
                    if (sla.getMin() == 0 && sla.getMax() == 0) {
                        logger.info("No SLA specified nothing to enforce {}", sla);
                    } else {
                        try {
                            // first check if response has job meta for last job
                            Optional<IMantisJobMetadata> cJob = (resp.jobMetadata);
                            if (cJob == null || !cJob.isPresent()) {
                                // else check archived jobs
                                cJob = jobStore.getArchivedJob(completedJob.get().getJobId());
                            }
                            if (cJob != null && cJob.isPresent()) {
                                getSelf().tell(new JobClusterProto.EnforceSLARequest(Instant.now(), of(cJob.get().getJobDefinition())), ActorRef.noSender());
                            } else {
                                logger.warn("Could not load last terminated job to use for triggering enforce SLA");
                            }
                        } catch (Exception e) {
                            // should not get here
                            logger.warn("Exception {} loading completed Job {} to enforce SLA due", e.getMessage(), completedJob.get().getJobId());
                        }
                    }
                }
            } else {
                logger.warn("Unable to mark job {} completed. ", resp.jobId);
            }
        } else {
            // should not get here
            if (resp.requestor != null && !getSelf().equals(resp.requestor)) {
                resp.requestor.tell(new KillJobResponse(resp.requestId, CLIENT_ERROR, JobState.Noop, "Job not found", resp.jobId, resp.user), getSelf());
            }
        }
    } else {
        if (resp.requestor != null && !getSelf().equals(resp.requestor)) {
            // kill job was not successful relay to caller
            resp.requestor.tell(new KillJobResponse(resp.requestId, resp.responseCode, resp.state, resp.message, resp.jobId, resp.user), getSelf());
        }
    }
    if (logger.isTraceEnabled()) {
        logger.trace("Exit onKillJobResponse {}", resp);
    }
}
Also used : KillJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) CompletedJob(io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob) SLA(io.mantisrx.server.master.domain.SLA) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)

Aggregations

KillJobResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse)2 JobClusterProto (io.mantisrx.master.jobcluster.proto.JobClusterProto)2 ActorRef (akka.actor.ActorRef)1 SchedulerException (com.netflix.fenzo.triggers.exceptions.SchedulerException)1 TriggerNotFoundException (com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException)1 IMantisJobMetadata (io.mantisrx.master.jobcluster.job.IMantisJobMetadata)1 CompletedJob (io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob)1 JobId (io.mantisrx.server.master.domain.JobId)1 SLA (io.mantisrx.server.master.domain.SLA)1 JobClusterAlreadyExistsException (io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)1