use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest in project mantis by Netflix.
the class JobClusterActor method onEnforceSLARequest.
@Override
public void onEnforceSLARequest(JobClusterProto.EnforceSLARequest request) {
if (logger.isTraceEnabled()) {
logger.trace("Enter onEnforceSLA for JobCluster {} with request", this.name, request);
}
numSLAEnforcementExecutions.increment();
long now = request.timeOfEnforcement.toEpochMilli();
List<JobInfo> pendingInitializationJobsPriorToCutoff = jobManager.getJobActorsStuckInInit(now, getExpirePendingInitializeDelayMs());
List<JobInfo> jobsStuckInAcceptedList = jobManager.getJobsStuckInAccepted(now, getExpireAcceptedDelayMs());
List<JobInfo> jobsStuckInTerminatingList = jobManager.getJobsStuckInTerminating(now, getExpireAcceptedDelayMs());
if (!slaEnforcer.hasSLA()) {
return;
}
int activeJobsCount = jobManager.activeJobsCount();
int acceptedJobsCount = jobManager.acceptedJobsCount();
// enforcing min
int noOfJobsToLaunch = slaEnforcer.enforceSLAMin(activeJobsCount, acceptedJobsCount);
if (noOfJobsToLaunch > 0) {
logger.info("Submitting {} jobs for job name {} as active count is {} and accepted count is {}", noOfJobsToLaunch, name, activeJobsCount, acceptedJobsCount);
String user = MANTIS_MASTER_USER;
if (request.jobDefinitionOp.isPresent()) {
user = request.jobDefinitionOp.get().getUser();
}
for (int i = 0; i < noOfJobsToLaunch; i++) {
getSelf().tell(new SubmitJobRequest(name, user, true, request.jobDefinitionOp), getSelf());
}
// enforce max.
} else {
List<JobInfo> listOfJobs = new ArrayList<>(activeJobsCount + acceptedJobsCount);
listOfJobs.addAll(jobManager.getActiveJobsList());
listOfJobs.addAll(jobManager.getAcceptedJobsList());
List<JobId> jobsToKill = slaEnforcer.enforceSLAMax(Collections.unmodifiableList(listOfJobs));
for (JobId jobId : jobsToKill) {
logger.info("Request termination for job {}", jobId);
getSelf().tell(new KillJobRequest(jobId, "SLA enforcement", JobCompletedReason.Killed, MANTIS_MASTER_USER, ActorRef.noSender()), getSelf());
}
}
if (logger.isTraceEnabled()) {
logger.trace("Exit onEnforceSLA for JobCluster {}", name);
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest in project mantis by Netflix.
the class JobClusterTest method testJobSubmitFails.
@Test
public void testJobSubmitFails() {
TestKit probe = new TestKit(system);
try {
String clusterName = "testJobSubmitFails";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
Mockito.doThrow(Exception.class).when(jobStoreMock).storeNewJob(any());
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient);
String jobId = clusterName + "-1";
jobClusterActor.tell(new SubmitJobRequest(clusterName, "user", Optional.ofNullable(jobDefn)), probe.getRef());
SubmitJobResponse submitResponse = probe.expectMsgClass(SubmitJobResponse.class);
assertEquals(SERVER_ERROR, submitResponse.responseCode);
verify(jobStoreMock, times(1)).createJobCluster(any());
verify(jobStoreMock, times(1)).updateJobCluster(any());
verify(jobStoreMock, times(0)).storeNewWorker(any());
verify(jobStoreMock, times(0)).storeNewWorkers(any(), any());
} catch (Exception e) {
fail();
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest in project mantis by Netflix.
the class JobClusterActor method onJobClusterUpdateArtifact.
@Override
public void onJobClusterUpdateArtifact(UpdateJobClusterArtifactRequest artifactReq) {
if (logger.isTraceEnabled()) {
logger.trace("Entering JobClusterActor:onJobClusterUpdateArtifact");
}
ActorRef sender = getSender();
try {
if (!isVersionUnique(artifactReq.getVersion(), jobClusterMetadata.getJobClusterDefinition().getJobClusterConfigs())) {
String msg = String.format("job cluster %s not updated as the version %s is not unique", name, artifactReq.getVersion());
logger.error(msg);
sender.tell(new UpdateJobClusterArtifactResponse(artifactReq.requestId, CLIENT_ERROR, msg), getSelf());
return;
}
JobClusterConfig newConfig = new JobClusterConfig.Builder().from(jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig()).withArtifactName(artifactReq.getArtifactName()).withVersion(artifactReq.getVersion()).withUploadedAt(System.currentTimeMillis()).build();
JobClusterDefinitionImpl updatedDefn = new JobClusterDefinitionImpl.Builder().from(jobClusterMetadata.getJobClusterDefinition()).withJobClusterConfig(newConfig).build();
IJobClusterMetadata jobCluster = new JobClusterMetadataImpl.Builder().withIsDisabled(jobClusterMetadata.isDisabled()).withLastJobCount(jobClusterMetadata.getLastJobCount()).withJobClusterDefinition(updatedDefn).build();
updateAndSaveJobCluster(jobCluster);
sender.tell(new UpdateJobClusterArtifactResponse(artifactReq.requestId, SUCCESS, name + " artifact updated"), getSelf());
eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_UPDATE, jobClusterMetadata.getJobClusterDefinition().getName(), name + " artifact update"));
if (!artifactReq.isSkipSubmit()) {
getSelf().tell(new SubmitJobRequest(name, artifactReq.getUser(), (empty())), getSelf());
}
} catch (Exception e) {
logger.error("job cluster not updated ", e);
sender.tell(new UpdateJobClusterArtifactResponse(artifactReq.requestId, SERVER_ERROR, name + " Job cluster artifact updation failed " + e.getMessage()), getSelf());
}
if (logger.isTraceEnabled()) {
logger.trace("Exit JobClusterActor:onJobClusterUpdateArtifact");
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest in project mantis by Netflix.
the class JobClusterTest method testJobSubmitWithUnique.
@Test
public void testJobSubmitWithUnique() {
TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitWithUnique";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
try {
final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient, "mytype");
String jobId = clusterName + "-1";
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Accepted);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, 1, new WorkerId(jobId, 0, 1));
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Launched);
jobClusterActor.tell(new SubmitJobRequest(clusterName, "user", Optional.ofNullable(jobDefn)), probe.getRef());
SubmitJobResponse submitResponse = probe.expectMsgClass(SubmitJobResponse.class);
// Get the same job id back
assertTrue(submitResponse.getJobId().isPresent());
assertEquals(jobId, submitResponse.getJobId().get().getId());
JobTestHelper.killJobAndVerify(probe, clusterName, new JobId(clusterName, 1), jobClusterActor);
verify(jobStoreMock, times(1)).createJobCluster(any());
verify(jobStoreMock, times(1)).updateJobCluster(any());
verify(jobStoreMock, times(1)).storeNewJob(any());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest in project mantis by Netflix.
the class JobClusterTest method testUpdateJobClusterArtifactWithAutoSubmit.
@Test
public void testUpdateJobClusterArtifactWithAutoSubmit() {
TestKit probe = new TestKit(system);
try {
String clusterName = "testUpdateJobClusterArtifactWithAutoSubmit";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
// when running concurrently with testGetJobDetailsForArchivedJob the following mock return is needed to avoid null pointer exception.
when(jobStoreMock.getArchivedJob(anyString())).thenReturn(empty());
SLA sla = new SLA(1, 1, null, null);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, Lists.newArrayList(), sla);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
// submit job with different scheduling info instance count compared to cluster default one.
final int job1InstanceCnt = 3;
final JobDefinition jobDefn = createJob(clusterName, MantisJobDurationType.Transient, new SchedulingInfo.Builder().numberOfStages(1).addStage(fakeJobCluster.getJobClusterConfig().getSchedulingInfo().forStage(1).toBuilder().numberOfInstances(job1InstanceCnt).build()).build());
String jobId = clusterName + "-1";
jobClusterActor.tell(new SubmitJobRequest(clusterName, "user", Optional.ofNullable(jobDefn)), probe.getRef());
SubmitJobResponse submitResponse = probe.expectMsgClass(SubmitJobResponse.class);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, 1, new WorkerId(clusterName, jobId, 0, 1));
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, BaseResponse.ResponseCode.SUCCESS, JobState.Accepted);
// Update artifact with skip submit = false
String artifact = "newartifact.zip";
String version = "0.0.2";
jobClusterActor.tell(new UpdateJobClusterArtifactRequest(clusterName, artifact, version, false, user), probe.getRef());
UpdateJobClusterArtifactResponse resp = probe.expectMsgClass(UpdateJobClusterArtifactResponse.class);
// ensure new job was launched
String jobId2 = clusterName + "-2";
assertTrue(JobTestHelper.verifyJobStatusWithPolling(probe, jobClusterActor, jobId2, JobState.Accepted));
// send it worker events to move it to started state
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId2, 1, new WorkerId(clusterName, jobId2, 0, 1));
jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId2).get()), probe.getRef());
GetJobDetailsResponse detailsResp = probe.expectMsgClass(Duration.ofSeconds(5), GetJobDetailsResponse.class);
assertEquals(JobState.Accepted, detailsResp.getJobMetadata().get().getState());
assertEquals(artifact, detailsResp.getJobMetadata().get().getArtifactName());
// verify newly launched job inherited instance count from previous job instance.
AtomicBoolean hasStage = new AtomicBoolean(false);
detailsResp.getJobMetadata().get().getSchedulingInfo().getStages().forEach((stageId, stageInfo) -> {
hasStage.set(true);
assertEquals(job1InstanceCnt, detailsResp.getJobMetadata().get().getSchedulingInfo().forStage(stageId).getNumberOfInstances());
});
assertTrue(hasStage.get());
assertTrue(JobTestHelper.verifyJobStatusWithPolling(probe, jobClusterActor, jobId2, JobState.Accepted));
} catch (InvalidJobException e) {
e.printStackTrace();
}
}
Aggregations