use of io.mantisrx.master.jobcluster.proto.JobClusterProto.KillJobRequest in project mantis by Netflix.
the class JobClusterActor method onEnforceSLARequest.
@Override
public void onEnforceSLARequest(JobClusterProto.EnforceSLARequest request) {
if (logger.isTraceEnabled()) {
logger.trace("Enter onEnforceSLA for JobCluster {} with request", this.name, request);
}
numSLAEnforcementExecutions.increment();
long now = request.timeOfEnforcement.toEpochMilli();
List<JobInfo> pendingInitializationJobsPriorToCutoff = jobManager.getJobActorsStuckInInit(now, getExpirePendingInitializeDelayMs());
List<JobInfo> jobsStuckInAcceptedList = jobManager.getJobsStuckInAccepted(now, getExpireAcceptedDelayMs());
List<JobInfo> jobsStuckInTerminatingList = jobManager.getJobsStuckInTerminating(now, getExpireAcceptedDelayMs());
if (!slaEnforcer.hasSLA()) {
return;
}
int activeJobsCount = jobManager.activeJobsCount();
int acceptedJobsCount = jobManager.acceptedJobsCount();
// enforcing min
int noOfJobsToLaunch = slaEnforcer.enforceSLAMin(activeJobsCount, acceptedJobsCount);
if (noOfJobsToLaunch > 0) {
logger.info("Submitting {} jobs for job name {} as active count is {} and accepted count is {}", noOfJobsToLaunch, name, activeJobsCount, acceptedJobsCount);
String user = MANTIS_MASTER_USER;
if (request.jobDefinitionOp.isPresent()) {
user = request.jobDefinitionOp.get().getUser();
}
for (int i = 0; i < noOfJobsToLaunch; i++) {
getSelf().tell(new SubmitJobRequest(name, user, true, request.jobDefinitionOp), getSelf());
}
// enforce max.
} else {
List<JobInfo> listOfJobs = new ArrayList<>(activeJobsCount + acceptedJobsCount);
listOfJobs.addAll(jobManager.getActiveJobsList());
listOfJobs.addAll(jobManager.getAcceptedJobsList());
List<JobId> jobsToKill = slaEnforcer.enforceSLAMax(Collections.unmodifiableList(listOfJobs));
for (JobId jobId : jobsToKill) {
logger.info("Request termination for job {}", jobId);
getSelf().tell(new KillJobRequest(jobId, "SLA enforcement", JobCompletedReason.Killed, MANTIS_MASTER_USER, ActorRef.noSender()), getSelf());
}
}
if (logger.isTraceEnabled()) {
logger.trace("Exit onEnforceSLA for JobCluster {}", name);
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterProto.KillJobRequest in project mantis by Netflix.
the class JobClusterActor method onJobClusterDisable.
@Override
public void onJobClusterDisable(final DisableJobClusterRequest req) {
if (logger.isTraceEnabled()) {
logger.trace("Enter onJobClusterDisable {}", req);
}
ActorRef sender = getSender();
try {
IJobClusterMetadata jobClusterMetadata = new JobClusterMetadataImpl.Builder().withIsDisabled(true).withLastJobCount(this.jobClusterMetadata.getLastJobCount()).withJobClusterDefinition((JobClusterDefinitionImpl) this.jobClusterMetadata.getJobClusterDefinition()).build();
// update store
jobStore.updateJobCluster(jobClusterMetadata);
this.jobClusterMetadata = jobClusterMetadata;
cronManager.destroyCron();
// change behavior to disabled
getContext().become(disabledBehavior);
// send kill requests for all non terminal jobs
List<JobInfo> jobsToKill = new ArrayList<>();
jobsToKill.addAll(jobManager.getAcceptedJobsList());
jobsToKill.addAll(jobManager.getActiveJobsList());
for (JobInfo jobInfo : jobsToKill) {
jobInfo.jobActor.tell(new KillJobRequest(jobInfo.jobId, "Job cluster disabled", JobCompletedReason.Killed, req.getUser(), ActorRef.noSender()), getSelf());
}
// disable SLA check timers
getTimers().cancel(BOOKKEEPING_TIMER_KEY);
eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_DISABLED, jobClusterMetadata.getJobClusterDefinition().getName(), name + " disabled"));
sender.tell(new DisableJobClusterResponse(req.requestId, SUCCESS, String.format("%s disabled", name)), getSelf());
numJobClusterDisable.increment();
logger.info("Job Cluster {} is disabbled", this.name);
} catch (Exception e) {
String errorMsg = "Exception disabling cluster " + name + " due to " + e.getMessage();
logger.error(errorMsg, e);
sender.tell(new DisableJobClusterResponse(req.requestId, SERVER_ERROR, errorMsg), getSelf());
numJobClusterDisableErrors.increment();
}
if (logger.isTraceEnabled()) {
logger.trace("Exit onJobClusterDisable");
}
}
Aggregations