use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse in project mantis by Netflix.
the class JobClusterActor method getFilteredNonTerminalJobList.
private Observable<MantisJobMetadataView> getFilteredNonTerminalJobList(ListJobCriteria request, Set<JobId> prefilteredJobIdSet) {
if (logger.isTraceEnabled()) {
logger.trace("Entering JobClusterActor:getFilteredNonTerminalJobList");
}
Duration timeout = Duration.ofMillis(500);
if ((request.getJobState().isPresent() && request.getJobState().get().equals(JobState.MetaState.Terminal))) {
if (logger.isTraceEnabled()) {
logger.trace("Exit JobClusterActor:getFilteredNonTerminalJobList with empty");
}
return Observable.empty();
}
List<JobInfo> jobInfoList;
//
if (!prefilteredJobIdSet.isEmpty()) {
jobInfoList = prefilteredJobIdSet.stream().map((jId) -> jobManager.getJobInfoForNonTerminalJob(jId)).filter((jInfoOp) -> jInfoOp.isPresent()).map((jInfoOp) -> jInfoOp.get()).collect(Collectors.toList());
} else {
// no prefiltering applied start with complete set of non terminal jobs
jobInfoList = jobManager.getAllNonTerminalJobsList();
}
List<JobInfo> shortenedList = jobInfoList.subList(0, Math.min(jobInfoList.size(), request.getLimit().orElse(DEFAULT_ACTIVE_JOB_LIMIT)));
if (logger.isDebugEnabled()) {
logger.debug("List of non terminal jobs {}", jobInfoList);
}
return Observable.from(shortenedList).flatMap((jInfo) -> {
GetJobDetailsRequest req = new GetJobDetailsRequest("system", jInfo.jobId);
CompletionStage<GetJobDetailsResponse> respCS = ask(jInfo.jobActor, req, timeout).thenApply(GetJobDetailsResponse.class::cast);
return Observable.from(respCS.toCompletableFuture(), Schedulers.io()).onErrorResumeNext(ex -> {
logger.warn("caught exception {}", ex.getMessage(), ex);
return Observable.empty();
});
}).filter((resp) -> resp != null && resp.getJobMetadata().isPresent()).map((resp) -> resp.getJobMetadata().get()).map((metaData) -> new MantisJobMetadataView(metaData, request.getStageNumberList(), request.getWorkerIndexList(), request.getWorkerNumberList(), request.getWorkerStateList(), false));
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse in project mantis by Netflix.
the class JobClusterActor method onGetJobDetailsRequest.
@Override
public void onGetJobDetailsRequest(GetJobDetailsRequest req) {
if (logger.isTraceEnabled()) {
logger.trace("Enter GetJobDetails {}", req);
}
GetJobDetailsResponse response = new GetJobDetailsResponse(req.requestId, CLIENT_ERROR_NOT_FOUND, "Job " + req.getJobId() + " not found", empty());
Optional<JobInfo> jInfo = jobManager.getJobInfoForNonTerminalJob(req.getJobId());
if (jInfo.isPresent()) {
if (logger.isDebugEnabled()) {
logger.debug("Forwarding getJobDetails to job actor for {}", req.getJobId());
}
jInfo.get().jobActor.forward(req, getContext());
return;
} else {
// Could be a terminated job
Optional<CompletedJob> completedJob = jobManager.getCompletedJob(req.getJobId());
if (completedJob.isPresent()) {
if (logger.isDebugEnabled()) {
logger.debug("Found Job {} in completed state ", req.getJobId());
}
try {
Optional<IMantisJobMetadata> jobMetaOp = jobStore.getArchivedJob(req.getJobId().getId());
if (jobMetaOp.isPresent()) {
response = new GetJobDetailsResponse(req.requestId, SUCCESS, "", jobMetaOp);
} else {
response = new GetJobDetailsResponse(req.requestId, CLIENT_ERROR_NOT_FOUND, "Job " + req.getJobId() + " not found", empty());
}
} catch (Exception e) {
logger.warn("Exception {} reading Job {} from Storage ", e.getMessage(), req.getJobId());
response = new GetJobDetailsResponse(req.requestId, CLIENT_ERROR, "Exception reading Job " + req.getJobId() + " " + e.getMessage(), empty());
}
} else {
logger.warn("No such job {} ", req.getJobId());
}
}
getSender().tell(response, getSelf());
if (logger.isTraceEnabled()) {
logger.trace("Exit GetJobDetails {}", req);
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse in project mantis by Netflix.
the class JobClusterTest method testJobSubmitWithNoSchedInfoUsesJobClusterValues.
@Test
public void testJobSubmitWithNoSchedInfoUsesJobClusterValues() {
TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitWithNoSchedInfoUsesJobClusterValues";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
List<Label> clusterLabels = new ArrayList<>();
Label label = new Label("clabelName", "cLabelValue");
clusterLabels.add(label);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, clusterLabels);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
try {
final JobDefinition jobDefn = new JobDefinition.Builder().withName(clusterName).withVersion("0.0.1").withSubscriptionTimeoutSecs(0).withUser("njoshi").build();
String jobId = clusterName + "-1";
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId).get()), probe.getRef());
GetJobDetailsResponse detailsResp = probe.expectMsgClass(GetJobDetailsResponse.class);
assertEquals(SUCCESS, detailsResp.responseCode);
assertEquals(JobState.Accepted, detailsResp.getJobMetadata().get().getState());
//
assertEquals(clusterLabels.size() + LabelManager.numberOfMandatoryLabels(), detailsResp.getJobMetadata().get().getLabels().size());
// confirm that the clusters labels got inherited
assertEquals(1, detailsResp.getJobMetadata().get().getLabels().stream().filter(l -> l.getName().equals("clabelName")).count());
// assertEquals(label, detailsResp.getJobMetadata().get().getLabels().get(0));
// Now submit another one with labels, it should not inherit cluster labels
Label jobLabel = new Label("jobLabel", "jobValue");
List<Label> jobLabelList = new ArrayList<>();
jobLabelList.add(jobLabel);
final JobDefinition jobDefn2 = new JobDefinition.Builder().withName(clusterName).withVersion("0.0.1").withLabels(jobLabelList).withSubscriptionTimeoutSecs(0).withUser("njoshi").build();
String jobId2 = clusterName + "-2";
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn2, jobId2);
jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId2).get()), probe.getRef());
GetJobDetailsResponse detailsResp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
assertEquals(SUCCESS, detailsResp2.responseCode);
assertEquals(JobState.Accepted, detailsResp2.getJobMetadata().get().getState());
assertEquals(clusterLabels.size() + 2, detailsResp2.getJobMetadata().get().getLabels().size());
// confirm that the clusters labels got inherited
// assertEquals(jobLabel, detailsResp2.getJobMetadata().get().getLabels().get(0));
assertEquals(1, detailsResp2.getJobMetadata().get().getLabels().stream().filter(l -> l.getName().equals(jobLabel.getName())).count());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
}
// Mockito.doThrow(IOException.class).when(jobStoreMock).storeNewJob(any());
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse in project mantis by Netflix.
the class JobClusterTest method testJobClusterDisable.
@Test
public void testJobClusterDisable() throws InterruptedException {
TestKit probe = new TestKit(system);
CountDownLatch storeCompletedCalled = new CountDownLatch(1);
String clusterName = "testJobClusterDisable";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
try {
final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient);
String jobId = clusterName + "-1";
IMantisJobMetadata completedJobMock = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 1)).withJobDefinition(jobDefn).withJobState(JobState.Completed).build();
when(jobStoreMock.getArchivedJob(any())).thenReturn(of(completedJobMock));
doAnswer((Answer) invocation -> {
storeCompletedCalled.countDown();
return null;
}).when(jobStoreMock).storeCompletedJobForCluster(any(), any());
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Accepted);
jobClusterActor.tell(new DisableJobClusterRequest(clusterName, "user"), probe.getRef());
DisableJobClusterResponse disableResp = probe.expectMsgClass(DisableJobClusterResponse.class);
assertEquals(SUCCESS, disableResp.responseCode);
jobClusterActor.tell(new GetJobClusterRequest(clusterName), probe.getRef());
GetJobClusterResponse getJobClusterResp = probe.expectMsgClass(GetJobClusterResponse.class);
assertTrue(getJobClusterResp.getJobCluster().get().isDisabled());
jobClusterActor.tell(new GetJobDetailsRequest(clusterName, JobId.fromId(jobId).get()), probe.getRef());
GetJobDetailsResponse jobDetailsResp = probe.expectMsgClass(GetJobDetailsResponse.class);
assertEquals(SUCCESS, jobDetailsResp.responseCode);
assertEquals(jobId, jobDetailsResp.getJobMetadata().get().getJobId().getId());
assertEquals(JobState.Completed, jobDetailsResp.getJobMetadata().get().getState());
verify(jobStoreMock, times(1)).createJobCluster(any());
verify(jobStoreMock, times(2)).updateJobCluster(any());
verify(jobStoreMock, times(1)).storeNewJob(any());
verify(jobStoreMock, times(1)).updateStage(any());
verify(jobStoreMock, times(2)).updateJob(any());
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
storeCompletedCalled.await(1, TimeUnit.SECONDS);
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse in project mantis by Netflix.
the class JobClusterManagerTest method testBootStrapJobClustersAndJobs.
@Test
public void testBootStrapJobClustersAndJobs() {
TestKit probe = new TestKit(system);
JobTestHelper.deleteAllFiles();
MantisJobStore jobStore = new MantisJobStore(new MantisStorageProviderAdapter(new io.mantisrx.server.master.store.SimpleCachedFileStorageProvider(), eventPublisher));
MantisJobStore jobStoreSpied = Mockito.spy(jobStore);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
ActorRef jobClusterManagerActor = system.actorOf(JobClustersManagerActor.props(jobStoreSpied, eventPublisher));
jobClusterManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(schedulerMock, false), probe.getRef());
JobClustersManagerInitializeResponse iResponse = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), JobClustersManagerInitializeResponse.class);
List<String> clusterNames = Lists.newArrayList("testBootStrapJobClustersAndJobs1", "testBootStrapJobClustersAndJobs2", "testBootStrapJobClustersAndJobs3");
String clusterWithNoJob = "testBootStrapJobClusterWithNoJob";
createJobClusterAndAssert(jobClusterManagerActor, clusterWithNoJob);
WorkerMigrationConfig migrationConfig = new WorkerMigrationConfig(MigrationStrategyEnum.PERCENTAGE, "{\"percentToMove\":60, \"intervalMs\":30000}");
// Create 3 clusters and submit 1 job each
for (String cluster : clusterNames) {
createJobClusterAndAssert(jobClusterManagerActor, cluster, migrationConfig);
submitJobAndAssert(jobClusterManagerActor, cluster);
if (cluster.equals("testBootStrapJobClustersAndJobs1")) {
// send worker events for job 1 so it goes to started state
String jobId = "testBootStrapJobClustersAndJobs1-1";
WorkerId workerId = new WorkerId(jobId, 0, 1);
WorkerEvent launchedEvent = new WorkerLaunched(workerId, 0, "host1", "vm1", empty(), new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)));
jobClusterManagerActor.tell(launchedEvent, probe.getRef());
WorkerEvent startInitEvent = new WorkerStatus(new Status(workerId.getJobId(), 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.INFO, "test START_INIT", MantisJobState.StartInitiated));
jobClusterManagerActor.tell(startInitEvent, probe.getRef());
WorkerEvent heartBeat = new WorkerHeartbeat(new Status(jobId, 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.HEARTBEAT, "", MantisJobState.Started));
jobClusterManagerActor.tell(heartBeat, probe.getRef());
// get Job status
jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId(jobId).get()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
// Ensure its launched
assertEquals(SUCCESS, resp2.responseCode);
assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
}
}
// kill 1 of the jobs to test archive path
JobClusterManagerProto.KillJobRequest killRequest = new JobClusterManagerProto.KillJobRequest("testBootStrapJobClustersAndJobs2-1", JobCompletedReason.Killed.toString(), "njoshi");
jobClusterManagerActor.tell(killRequest, probe.getRef());
JobClusterManagerProto.KillJobResponse killJobResponse = probe.expectMsgClass(JobClusterManagerProto.KillJobResponse.class);
assertEquals(SUCCESS, killJobResponse.responseCode);
JobTestHelper.sendWorkerTerminatedEvent(probe, jobClusterManagerActor, "testBootStrapJobClustersAndJobs2-1", new WorkerId("testBootStrapJobClustersAndJobs2-1", 0, 1));
try {
Thread.sleep(500);
} catch (InterruptedException e) {
e.printStackTrace();
}
// Stop job cluster Manager Actor
system.stop(jobClusterManagerActor);
// create new instance
jobClusterManagerActor = system.actorOf(JobClustersManagerActor.props(jobStoreSpied, eventPublisher));
// initialize it
jobClusterManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(schedulerMock, true), probe.getRef());
JobClustersManagerInitializeResponse initializeResponse = probe.expectMsgClass(JobClustersManagerInitializeResponse.class);
// probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES),JobClusterManagerProto.JobClustersManagerInitializeResponse.class);
// probe.expectMsgClass(JobClusterManagerProto.JobClustersManagerInitializeResponse.class);
assertEquals(SUCCESS, initializeResponse.responseCode);
// Get Cluster Config
jobClusterManagerActor.tell(new GetJobClusterRequest("testBootStrapJobClustersAndJobs1"), probe.getRef());
GetJobClusterResponse clusterResponse = probe.expectMsgClass(GetJobClusterResponse.class);
assertEquals(SUCCESS, clusterResponse.responseCode);
assertTrue(clusterResponse.getJobCluster().isPresent());
WorkerMigrationConfig mConfig = clusterResponse.getJobCluster().get().getMigrationConfig();
assertEquals(migrationConfig.getStrategy(), mConfig.getStrategy());
assertEquals(migrationConfig.getConfigString(), migrationConfig.getConfigString());
// get Job status
jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId("testBootStrapJobClustersAndJobs1-1").get()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
// Ensure its launched
System.out.println("Resp2 -> " + resp2.message);
assertEquals(SUCCESS, resp2.responseCode);
assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
// 1 jobs should be in completed state
jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId("testBootStrapJobClustersAndJobs2-1").get()), probe.getRef());
resp2 = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), GetJobDetailsResponse.class);
// Ensure its completed
assertEquals(SUCCESS, resp2.responseCode);
assertEquals(JobState.Completed, resp2.getJobMetadata().get().getState());
jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId("testBootStrapJobClustersAndJobs3-1").get()), probe.getRef());
resp2 = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), GetJobDetailsResponse.class);
// Ensure its Accepted
assertEquals(SUCCESS, resp2.responseCode);
assertEquals(JobState.Accepted, resp2.getJobMetadata().get().getState());
try {
Optional<JobWorker> workerByIndex = resp2.getJobMetadata().get().getWorkerByIndex(1, 0);
assertTrue(workerByIndex.isPresent());
Optional<IMantisStageMetadata> stageMetadata = resp2.getJobMetadata().get().getStageMetadata(1);
assertTrue(stageMetadata.isPresent());
JobWorker workerByIndex1 = stageMetadata.get().getWorkerByIndex(0);
System.out.println("Got worker by index : " + workerByIndex1);
Optional<JobWorker> worker = resp2.getJobMetadata().get().getWorkerByNumber(1);
assertTrue(worker.isPresent());
} catch (io.mantisrx.server.master.persistence.exceptions.InvalidJobException e) {
e.printStackTrace();
}
jobClusterManagerActor.tell(new GetLastSubmittedJobIdStreamRequest("testBootStrapJobClustersAndJobs1"), probe.getRef());
GetLastSubmittedJobIdStreamResponse lastSubmittedJobIdStreamResponse = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), GetLastSubmittedJobIdStreamResponse.class);
lastSubmittedJobIdStreamResponse.getjobIdBehaviorSubject().get().take(1).toBlocking().subscribe((jId) -> {
assertEquals(new JobId("testBootStrapJobClustersAndJobs1", 1), jId);
});
jobClusterManagerActor.tell(new GetJobClusterRequest(clusterWithNoJob), probe.getRef());
GetJobClusterResponse jobClusterResponse = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), GetJobClusterResponse.class);
assertEquals(SUCCESS, jobClusterResponse.responseCode);
assertTrue(jobClusterResponse.getJobCluster().isPresent());
assertEquals(clusterWithNoJob, jobClusterResponse.getJobCluster().get().getName());
// 1 running worker
verify(schedulerMock, timeout(100_1000).times(1)).initializeRunningWorker(any(), any());
// 2 worker schedule requests
verify(schedulerMock, timeout(100_000).times(4)).scheduleWorker(any());
try {
Mockito.verify(jobStoreSpied).loadAllArchivedJobsAsync();
Mockito.verify(jobStoreSpied).loadAllActiveJobs();
Mockito.verify(jobStoreSpied).loadAllCompletedJobs();
Mockito.verify(jobStoreSpied).archiveWorker(any());
Mockito.verify(jobStoreSpied).archiveJob(any());
} catch (IOException e) {
e.printStackTrace();
fail();
}
}
Aggregations