use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS in project mantis by Netflix.
the class JobClusterActor method onJobList.
@Override
public void onJobList(final ListJobsRequest request) {
if (logger.isDebugEnabled()) {
logger.info("Entering JCA:onJobList");
}
final ActorRef sender = getSender();
final ActorRef self = getSelf();
Set<JobId> jobIdsFilteredByLabelsSet = new HashSet<>();
// If labels criterion is given prefilter by labels
if (!request.getCriteria().getMatchingLabels().isEmpty()) {
jobIdsFilteredByLabelsSet = jobManager.getJobsMatchingLabels(request.getCriteria().getMatchingLabels(), request.getCriteria().getLabelsOperand());
// Found no jobs matching labels exit
if (jobIdsFilteredByLabelsSet.isEmpty()) {
if (logger.isTraceEnabled()) {
logger.trace("Exit JCA:onJobList {}", jobIdsFilteredByLabelsSet.size());
}
sender.tell(new ListJobsResponse(request.requestId, SUCCESS, "", new ArrayList<>()), self);
return;
}
}
// Found jobs matching labels or no labels criterion given.
// Apply additional criterion to both active and completed jobs
getFilteredNonTerminalJobList(request.getCriteria(), jobIdsFilteredByLabelsSet).mergeWith(getFilteredTerminalJobList(request.getCriteria(), jobIdsFilteredByLabelsSet)).collect(() -> Lists.<MantisJobMetadataView>newArrayList(), List::add).doOnNext(resultList -> {
if (logger.isTraceEnabled()) {
logger.trace("Exit JCA:onJobList {}", resultList.size());
}
sender.tell(new ListJobsResponse(request.requestId, SUCCESS, "", resultList), self);
}).subscribe();
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS in project mantis by Netflix.
the class JobClusterActor method initRunningJobs.
/**
* Iterate through list of jobs in Active jobs table.
* if a Job is completed move it completed table
* else bootstrap the job (create actor, send init request)
* Finally setup sla enforcement
* @param initReq
* @param sender
*/
private void initRunningJobs(JobClusterProto.InitializeJobClusterRequest initReq, ActorRef sender) {
List<CompletedJob> completedJobsList = initReq.completedJobsList;
List<IMantisJobMetadata> jobList = initReq.jobList;
logger.info("In _initJobs for cluster {}: {} activeJobs and {} completedJobs", name, jobList.size(), completedJobsList.size());
if (logger.isDebugEnabled()) {
logger.debug("In _initJobs for cluster {} activeJobs -> {} and completedJobs -> {}", name, jobList, completedJobsList);
}
Observable.from(jobList).flatMap((jobMeta) -> {
if (JobState.isTerminalState(jobMeta.getState())) {
jobManager.persistToCompletedJobAndArchiveJobTables(jobMeta);
return Observable.empty();
} else {
if (jobMeta.getSchedulingInfo() == null) {
logger.error("Scheduling info is null for active job {} in cluster {}." + "Skipping bootstrap ", jobMeta.getJobId(), name);
return Observable.empty();
} else {
return Observable.just(jobMeta);
}
}
}).flatMap((jobMeta) -> jobManager.bootstrapJob((MantisJobMetadataImpl) jobMeta, this.jobClusterMetadata)).subscribe((jobInited) -> {
logger.info("Job Id {} initialized with code {}", jobInited.jobId, jobInited.responseCode);
}, (error) -> logger.warn("Exception initializing jobs {}", error.getMessage()), () -> {
if (initReq.jobList.size() > 0) {
JobId lastJobId = new JobId(this.name, initReq.lastJobNumber);
this.jobIdSubmissionSubject.onNext(lastJobId);
}
setBookkeepingTimer(BOOKKEEPING_INTERVAL_SECS);
getContext().become(initializedBehavior);
logger.info("Job Cluster {} initialized", this.name);
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SUCCESS, String.format("JobCluster %s initialized successfully", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
});
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS in project mantis by Netflix.
the class JobClustersManagerActor method initialize.
private void initialize(JobClustersManagerInitialize initMsg) {
ActorRef sender = getSender();
try {
logger.info("In JobClustersManagerActor:initialize");
this.jobListHelperActor = getContext().actorOf(JobListHelperActor.props(), "JobListHelperActor");
getContext().watch(jobListHelperActor);
mantisScheduler = initMsg.getScheduler();
Map<String, IJobClusterMetadata> jobClusterMap = new HashMap<>();
this.jobClusterInfoManager = new JobClusterInfoManager(jobStore, mantisScheduler, eventPublisher);
if (!initMsg.isLoadJobsFromStore()) {
getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
} else {
List<IJobClusterMetadata> jobClusters = jobStore.loadAllJobClusters();
logger.info("Read {} job clusters from storage", jobClusters.size());
List<IMantisJobMetadata> activeJobs = jobStore.loadAllActiveJobs();
logger.info("Read {} jobs from storage", activeJobs.size());
List<CompletedJob> completedJobs = jobStore.loadAllCompletedJobs();
logger.info("Read {} completed jobs from storage", completedJobs.size());
for (IJobClusterMetadata jobClusterMeta : jobClusters) {
String clusterName = jobClusterMeta.getJobClusterDefinition().getName();
jobClusterMap.put(clusterName, jobClusterMeta);
}
Map<String, List<IMantisJobMetadata>> clusterToJobMap = new HashMap<>();
Map<String, List<CompletedJob>> clusterToCompletedJobMap = new HashMap<>();
// group jobs by cluster
for (IMantisJobMetadata jobMeta : activeJobs) {
String clusterName = jobMeta.getClusterName();
clusterToJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
}
for (CompletedJob jobMeta : completedJobs) {
String clusterName = jobMeta.getName();
clusterToCompletedJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
}
long masterInitTimeoutSecs = ConfigurationProvider.getConfig().getMasterInitTimeoutSecs();
long timeout = ((masterInitTimeoutSecs - 60)) > 0 ? (masterInitTimeoutSecs - 60) : masterInitTimeoutSecs;
Observable.from(jobClusterMap.values()).filter((jobClusterMeta) -> jobClusterMeta != null && jobClusterMeta.getJobClusterDefinition() != null).flatMap((jobClusterMeta) -> {
Duration t = Duration.ofSeconds(timeout);
Optional<JobClusterInfo> jobClusterInfoO = jobClusterInfoManager.createClusterActorAndRegister(jobClusterMeta.getJobClusterDefinition());
if (!jobClusterInfoO.isPresent()) {
logger.info("skipping job cluster {} on bootstrap as actor creating failed", jobClusterMeta.getJobClusterDefinition().getName());
return Observable.empty();
}
JobClusterInfo jobClusterInfo = jobClusterInfoO.get();
List<IMantisJobMetadata> jobList = Lists.newArrayList();
List<IMantisJobMetadata> jList = clusterToJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
if (jList != null) {
jobList.addAll(jList);
}
List<CompletedJob> completedJobsList = Lists.newArrayList();
List<CompletedJob> cList = clusterToCompletedJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
if (cList != null) {
completedJobsList.addAll(cList);
}
JobClusterProto.InitializeJobClusterRequest req = new JobClusterProto.InitializeJobClusterRequest((JobClusterDefinitionImpl) jobClusterMeta.getJobClusterDefinition(), jobClusterMeta.isDisabled(), jobClusterMeta.getLastJobCount(), jobList, completedJobsList, "system", getSelf(), false);
return jobClusterInfoManager.initializeCluster(jobClusterInfo, req, t);
}).filter(Objects::nonNull).toBlocking().subscribe((clusterInit) -> {
logger.info("JobCluster {} inited with code {}", clusterInit.jobClusterName, clusterInit.responseCode);
numJobClusterInitSuccesses.increment();
}, (error) -> {
logger.warn("Exception initializing clusters {}", error.getMessage(), error);
logger.error("JobClusterManagerActor had errors during initialization NOT transitioning to initialized behavior");
// getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, "JobClustersManager inited with errors"), getSelf());
}, () -> {
logger.info("JobClusterManagerActor transitioning to initialized behavior");
getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
});
getTimers().startPeriodicTimer(CHECK_CLUSTERS_TIMER_KEY, new ReconcileJobCluster(), Duration.ofSeconds(checkAgainInSecs));
// kick off loading of archived jobs
logger.info("Kicking off archived job load asynchronously");
jobStore.loadAllArchivedJobsAsync();
}
} catch (Exception e) {
logger.error("caught exception", e);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, e.getMessage()), getSelf());
}
logger.info("JobClustersManagerActor:initialize ends");
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS in project mantis by Netflix.
the class JobListHelperActor method onJobClustersList.
private void onJobClustersList(ListJobClusterRequestWrapper request) {
if (logger.isTraceEnabled()) {
logger.trace("In onJobClustersListRequest {}", request);
}
ActorRef callerActor = getSender();
Timeout timeout = new Timeout(Duration.create(500, TimeUnit.MILLISECONDS));
List<MantisJobClusterMetadataView> clusterList = Lists.newArrayList();
Observable.from(request.jobClusterInfoMap.values()).flatMap((jInfo) -> {
CompletionStage<JobClusterManagerProto.GetJobClusterResponse> respCS = ask(jInfo.jobClusterActor, new JobClusterManagerProto.GetJobClusterRequest(jInfo.clusterName), timeout).thenApply(JobClusterManagerProto.GetJobClusterResponse.class::cast);
return Observable.from(respCS.toCompletableFuture(), Schedulers.io()).onErrorResumeNext(ex -> {
logger.warn("caught exception {}", ex.getMessage(), ex);
return Observable.empty();
});
}).filter((resp) -> resp != null && resp.getJobCluster().isPresent()).map((resp) -> resp.getJobCluster().get()).doOnError(this::logError).subscribeOn(Schedulers.computation()).subscribe(clusterList::add, (err) -> {
logger.warn("Exception in onJobClusterList ", err);
if (logger.isTraceEnabled()) {
logger.trace("Exit onJobClustersListRequest {}", err);
}
request.sender.tell(new JobClusterManagerProto.ListJobClustersResponse(request.listJobClustersRequest.requestId, SERVER_ERROR, err.getMessage(), clusterList), callerActor);
}, () -> {
if (logger.isTraceEnabled()) {
logger.trace("Exit onJobClustersListRequest {}", clusterList);
}
request.sender.tell(new JobClusterManagerProto.ListJobClustersResponse(request.listJobClustersRequest.requestId, SUCCESS, "", clusterList), callerActor);
});
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS in project mantis by Netflix.
the class JobClusterTest method testJobClusterDisable.
@Test
public void testJobClusterDisable() throws InterruptedException {
TestKit probe = new TestKit(system);
CountDownLatch storeCompletedCalled = new CountDownLatch(1);
String clusterName = "testJobClusterDisable";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
try {
final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient);
String jobId = clusterName + "-1";
IMantisJobMetadata completedJobMock = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 1)).withJobDefinition(jobDefn).withJobState(JobState.Completed).build();
when(jobStoreMock.getArchivedJob(any())).thenReturn(of(completedJobMock));
doAnswer((Answer) invocation -> {
storeCompletedCalled.countDown();
return null;
}).when(jobStoreMock).storeCompletedJobForCluster(any(), any());
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Accepted);
jobClusterActor.tell(new DisableJobClusterRequest(clusterName, "user"), probe.getRef());
DisableJobClusterResponse disableResp = probe.expectMsgClass(DisableJobClusterResponse.class);
assertEquals(SUCCESS, disableResp.responseCode);
jobClusterActor.tell(new GetJobClusterRequest(clusterName), probe.getRef());
GetJobClusterResponse getJobClusterResp = probe.expectMsgClass(GetJobClusterResponse.class);
assertTrue(getJobClusterResp.getJobCluster().get().isDisabled());
jobClusterActor.tell(new GetJobDetailsRequest(clusterName, JobId.fromId(jobId).get()), probe.getRef());
GetJobDetailsResponse jobDetailsResp = probe.expectMsgClass(GetJobDetailsResponse.class);
assertEquals(SUCCESS, jobDetailsResp.responseCode);
assertEquals(jobId, jobDetailsResp.getJobMetadata().get().getJobId().getId());
assertEquals(JobState.Completed, jobDetailsResp.getJobMetadata().get().getState());
verify(jobStoreMock, times(1)).createJobCluster(any());
verify(jobStoreMock, times(2)).updateJobCluster(any());
verify(jobStoreMock, times(1)).storeNewJob(any());
verify(jobStoreMock, times(1)).updateStage(any());
verify(jobStoreMock, times(2)).updateJob(any());
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
storeCompletedCalled.await(1, TimeUnit.SECONDS);
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
Aggregations