Search in sources :

Example 11 with SLA

use of io.mantisrx.server.master.domain.SLA in project mantis by Netflix.

the class SLAEnforcerTest method slaMaxTest.

@Test
public void slaMaxTest() {
    Instant now = Instant.now();
    int min = 0;
    int max = 2;
    SLA sla = new SLA(min, max, null, null);
    SLAEnforcer slaEnf = new SLAEnforcer(sla);
    List<JobInfo> jobList = Lists.newArrayList(new JobInfo(new JobId("cname", 1), null, now.getMillis(), null, JobState.Accepted, null), new JobInfo(new JobId("cname", 2), null, now.getMillis(), null, JobState.Launched, null), new JobInfo(new JobId("cname", 3), null, now.getMillis(), null, JobState.Accepted, null), new JobInfo(new JobId("cname", 4), null, now.getMillis(), null, JobState.Launched, null));
    // 2 active and 2 accepted jobs, sla met at job id 2, hence delete job 1
    List<JobId> jobsToDelete = slaEnf.enforceSLAMax(jobList);
    assertEquals(1, jobsToDelete.size());
    assertEquals("cname-1", jobsToDelete.get(0).getId());
}
Also used : JobInfo(io.mantisrx.master.jobcluster.JobClusterActor.JobInfo) Instant(org.joda.time.Instant) SLA(io.mantisrx.server.master.domain.SLA) JobId(io.mantisrx.server.master.domain.JobId) Test(org.junit.Test)

Example 12 with SLA

use of io.mantisrx.server.master.domain.SLA in project mantis by Netflix.

the class SLAEnforcerTest method slaMaxTest4.

@Test
public void slaMaxTest4() {
    Instant now = Instant.now();
    int min = 0;
    int max = 2;
    SLA sla = new SLA(min, max, null, null);
    SLAEnforcer slaEnf = new SLAEnforcer(sla);
    List<JobInfo> jobList = Lists.newArrayList(new JobInfo(new JobId("cname", 4), null, now.getMillis(), null, JobState.Launched, null), new JobInfo(new JobId("cname", 1), null, now.getMillis(), null, JobState.Accepted, null), new JobInfo(new JobId("cname", 2), null, now.getMillis(), null, JobState.Accepted, null), new JobInfo(new JobId("cname", 6), null, now.getMillis(), null, JobState.Launched, null), new JobInfo(new JobId("cname", 3), null, now.getMillis(), null, JobState.Accepted, null), new JobInfo(new JobId("cname", 5), null, now.getMillis(), null, JobState.Accepted, null), new JobInfo(new JobId("cname", 7), null, now.getMillis(), null, JobState.Launched, null));
    // 3 active and 4 accepted jobs, terminate jobs 1 & 2 & 3 & 4 & 5
    List<JobId> jobsToDelete = slaEnf.enforceSLAMax(jobList);
    assertEquals(5, jobsToDelete.size());
    assertTrue(jobsToDelete.contains(new JobId("cname", 1)));
    assertTrue(jobsToDelete.contains(new JobId("cname", 2)));
    assertTrue(jobsToDelete.contains(new JobId("cname", 3)));
    assertTrue(jobsToDelete.contains(new JobId("cname", 4)));
    assertTrue(jobsToDelete.contains(new JobId("cname", 5)));
}
Also used : JobInfo(io.mantisrx.master.jobcluster.JobClusterActor.JobInfo) Instant(org.joda.time.Instant) SLA(io.mantisrx.server.master.domain.SLA) JobId(io.mantisrx.server.master.domain.JobId) Test(org.junit.Test)

Example 13 with SLA

use of io.mantisrx.server.master.domain.SLA in project mantis by Netflix.

the class JobClusterActor method onKillJobResponse.

/**
 * Sent by job actor when the job shutdown is initiated.
 * @param resp Kill job response message
 */
@Override
public void onKillJobResponse(JobClusterProto.KillJobResponse resp) {
    if (logger.isTraceEnabled()) {
        logger.trace("Enter onKillJobResponse {}", resp);
    }
    if (resp.responseCode == SUCCESS) {
        Optional<JobInfo> jInfo = jobManager.getJobInfoForNonTerminalJob(resp.jobId);
        if (jInfo.isPresent()) {
            // stop watching actor
            getContext().unwatch(jInfo.get().jobActor);
            numJobShutdowns.increment();
            logger.info("Marking job {} as terminated", jInfo.get().jobId);
            // check requestor is not self to avoid an infinite loop
            if (resp.requestor != null && !getSelf().equals(resp.requestor)) {
                resp.requestor.tell(new KillJobResponse(resp.requestId, resp.responseCode, resp.state, resp.message, resp.jobId, resp.user), getSelf());
            }
            Optional<CompletedJob> completedJob = jobManager.markCompleted(resp.jobId, resp.jobMetadata, resp.state);
            if (completedJob.isPresent()) {
                logger.info("In cleanupAfterJobKill for Job {} in state {} and metadata {} ", resp.jobId, resp.state, resp.jobMetadata);
                // enforce SLA
                if (!jobClusterMetadata.isDisabled()) {
                    SLA sla = this.jobClusterMetadata.getJobClusterDefinition().getSLA();
                    if (sla.getMin() == 0 && sla.getMax() == 0) {
                        logger.info("No SLA specified nothing to enforce {}", sla);
                    } else {
                        try {
                            // first check if response has job meta for last job
                            Optional<IMantisJobMetadata> cJob = (resp.jobMetadata);
                            if (cJob == null || !cJob.isPresent()) {
                                // else check archived jobs
                                cJob = jobStore.getArchivedJob(completedJob.get().getJobId());
                            }
                            if (cJob != null && cJob.isPresent()) {
                                getSelf().tell(new JobClusterProto.EnforceSLARequest(Instant.now(), of(cJob.get().getJobDefinition())), ActorRef.noSender());
                            } else {
                                logger.warn("Could not load last terminated job to use for triggering enforce SLA");
                            }
                        } catch (Exception e) {
                            // should not get here
                            logger.warn("Exception {} loading completed Job {} to enforce SLA due", e.getMessage(), completedJob.get().getJobId());
                        }
                    }
                }
            } else {
                logger.warn("Unable to mark job {} completed. ", resp.jobId);
            }
        } else {
            // should not get here
            if (resp.requestor != null && !getSelf().equals(resp.requestor)) {
                resp.requestor.tell(new KillJobResponse(resp.requestId, CLIENT_ERROR, JobState.Noop, "Job not found", resp.jobId, resp.user), getSelf());
            }
        }
    } else {
        if (resp.requestor != null && !getSelf().equals(resp.requestor)) {
            // kill job was not successful relay to caller
            resp.requestor.tell(new KillJobResponse(resp.requestId, resp.responseCode, resp.state, resp.message, resp.jobId, resp.user), getSelf());
        }
    }
    if (logger.isTraceEnabled()) {
        logger.trace("Exit onKillJobResponse {}", resp);
    }
}
Also used : KillJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) CompletedJob(io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob) SLA(io.mantisrx.server.master.domain.SLA) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)

Aggregations

SLA (io.mantisrx.server.master.domain.SLA)13 Test (org.junit.Test)10 JobId (io.mantisrx.server.master.domain.JobId)7 JobInfo (io.mantisrx.master.jobcluster.JobClusterActor.JobInfo)6 Instant (org.joda.time.Instant)6 SchedulerException (com.netflix.fenzo.triggers.exceptions.SchedulerException)3 TriggerNotFoundException (com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException)3 JobClusterAlreadyExistsException (io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)3 ActorRef (akka.actor.ActorRef)2 LifecycleEventsProto (io.mantisrx.master.events.LifecycleEventsProto)2 IMantisJobMetadata (io.mantisrx.master.jobcluster.job.IMantisJobMetadata)2 UpdateJobClusterSLAResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLAResponse)2 JobClusterDefinitionImpl (io.mantisrx.server.master.domain.JobClusterDefinitionImpl)2 AbstractActorWithTimers (akka.actor.AbstractActorWithTimers)1 Props (akka.actor.Props)1 SupervisorStrategy (akka.actor.SupervisorStrategy)1 Terminated (akka.actor.Terminated)1 PatternsCS.ask (akka.pattern.PatternsCS.ask)1 LabelUtils (com.mantisrx.common.utils.LabelUtils)1 CronTrigger (com.netflix.fenzo.triggers.CronTrigger)1