use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.
the class JobClustersManagerActor method initialize.
private void initialize(JobClustersManagerInitialize initMsg) {
ActorRef sender = getSender();
try {
logger.info("In JobClustersManagerActor:initialize");
this.jobListHelperActor = getContext().actorOf(JobListHelperActor.props(), "JobListHelperActor");
getContext().watch(jobListHelperActor);
mantisScheduler = initMsg.getScheduler();
Map<String, IJobClusterMetadata> jobClusterMap = new HashMap<>();
this.jobClusterInfoManager = new JobClusterInfoManager(jobStore, mantisScheduler, eventPublisher);
if (!initMsg.isLoadJobsFromStore()) {
getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
} else {
List<IJobClusterMetadata> jobClusters = jobStore.loadAllJobClusters();
logger.info("Read {} job clusters from storage", jobClusters.size());
List<IMantisJobMetadata> activeJobs = jobStore.loadAllActiveJobs();
logger.info("Read {} jobs from storage", activeJobs.size());
List<CompletedJob> completedJobs = jobStore.loadAllCompletedJobs();
logger.info("Read {} completed jobs from storage", completedJobs.size());
for (IJobClusterMetadata jobClusterMeta : jobClusters) {
String clusterName = jobClusterMeta.getJobClusterDefinition().getName();
jobClusterMap.put(clusterName, jobClusterMeta);
}
Map<String, List<IMantisJobMetadata>> clusterToJobMap = new HashMap<>();
Map<String, List<CompletedJob>> clusterToCompletedJobMap = new HashMap<>();
// group jobs by cluster
for (IMantisJobMetadata jobMeta : activeJobs) {
String clusterName = jobMeta.getClusterName();
clusterToJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
}
for (CompletedJob jobMeta : completedJobs) {
String clusterName = jobMeta.getName();
clusterToCompletedJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
}
long masterInitTimeoutSecs = ConfigurationProvider.getConfig().getMasterInitTimeoutSecs();
long timeout = ((masterInitTimeoutSecs - 60)) > 0 ? (masterInitTimeoutSecs - 60) : masterInitTimeoutSecs;
Observable.from(jobClusterMap.values()).filter((jobClusterMeta) -> jobClusterMeta != null && jobClusterMeta.getJobClusterDefinition() != null).flatMap((jobClusterMeta) -> {
Duration t = Duration.ofSeconds(timeout);
Optional<JobClusterInfo> jobClusterInfoO = jobClusterInfoManager.createClusterActorAndRegister(jobClusterMeta.getJobClusterDefinition());
if (!jobClusterInfoO.isPresent()) {
logger.info("skipping job cluster {} on bootstrap as actor creating failed", jobClusterMeta.getJobClusterDefinition().getName());
return Observable.empty();
}
JobClusterInfo jobClusterInfo = jobClusterInfoO.get();
List<IMantisJobMetadata> jobList = Lists.newArrayList();
List<IMantisJobMetadata> jList = clusterToJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
if (jList != null) {
jobList.addAll(jList);
}
List<CompletedJob> completedJobsList = Lists.newArrayList();
List<CompletedJob> cList = clusterToCompletedJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
if (cList != null) {
completedJobsList.addAll(cList);
}
JobClusterProto.InitializeJobClusterRequest req = new JobClusterProto.InitializeJobClusterRequest((JobClusterDefinitionImpl) jobClusterMeta.getJobClusterDefinition(), jobClusterMeta.isDisabled(), jobClusterMeta.getLastJobCount(), jobList, completedJobsList, "system", getSelf(), false);
return jobClusterInfoManager.initializeCluster(jobClusterInfo, req, t);
}).filter(Objects::nonNull).toBlocking().subscribe((clusterInit) -> {
logger.info("JobCluster {} inited with code {}", clusterInit.jobClusterName, clusterInit.responseCode);
numJobClusterInitSuccesses.increment();
}, (error) -> {
logger.warn("Exception initializing clusters {}", error.getMessage(), error);
logger.error("JobClusterManagerActor had errors during initialization NOT transitioning to initialized behavior");
// getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, "JobClustersManager inited with errors"), getSelf());
}, () -> {
logger.info("JobClusterManagerActor transitioning to initialized behavior");
getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
});
getTimers().startPeriodicTimer(CHECK_CLUSTERS_TIMER_KEY, new ReconcileJobCluster(), Duration.ofSeconds(checkAgainInSecs));
// kick off loading of archived jobs
logger.info("Kicking off archived job load asynchronously");
jobStore.loadAllArchivedJobsAsync();
}
} catch (Exception e) {
logger.error("caught exception", e);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, e.getMessage()), getSelf());
}
logger.info("JobClustersManagerActor:initialize ends");
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.
the class JobListHelperActor method onJobClustersList.
private void onJobClustersList(ListJobClusterRequestWrapper request) {
if (logger.isTraceEnabled()) {
logger.trace("In onJobClustersListRequest {}", request);
}
ActorRef callerActor = getSender();
Timeout timeout = new Timeout(Duration.create(500, TimeUnit.MILLISECONDS));
List<MantisJobClusterMetadataView> clusterList = Lists.newArrayList();
Observable.from(request.jobClusterInfoMap.values()).flatMap((jInfo) -> {
CompletionStage<JobClusterManagerProto.GetJobClusterResponse> respCS = ask(jInfo.jobClusterActor, new JobClusterManagerProto.GetJobClusterRequest(jInfo.clusterName), timeout).thenApply(JobClusterManagerProto.GetJobClusterResponse.class::cast);
return Observable.from(respCS.toCompletableFuture(), Schedulers.io()).onErrorResumeNext(ex -> {
logger.warn("caught exception {}", ex.getMessage(), ex);
return Observable.empty();
});
}).filter((resp) -> resp != null && resp.getJobCluster().isPresent()).map((resp) -> resp.getJobCluster().get()).doOnError(this::logError).subscribeOn(Schedulers.computation()).subscribe(clusterList::add, (err) -> {
logger.warn("Exception in onJobClusterList ", err);
if (logger.isTraceEnabled()) {
logger.trace("Exit onJobClustersListRequest {}", err);
}
request.sender.tell(new JobClusterManagerProto.ListJobClustersResponse(request.listJobClustersRequest.requestId, SERVER_ERROR, err.getMessage(), clusterList), callerActor);
}, () -> {
if (logger.isTraceEnabled()) {
logger.trace("Exit onJobClustersListRequest {}", clusterList);
}
request.sender.tell(new JobClusterManagerProto.ListJobClustersResponse(request.listJobClustersRequest.requestId, SUCCESS, "", clusterList), callerActor);
});
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.
the class JobClusterRoute method getJobClusterRoutes.
private Route getJobClusterRoutes() {
return route(path(segment("api").slash("submit"), () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/submit called {}", request);
try {
MantisJobDefinition mjd = Jackson.fromJSON(request, MantisJobDefinition.class);
logger.debug("job submit request {}", mjd);
mjd.validate(true);
Pair<Boolean, String> validationResult = validateSubmitJobRequest(mjd);
if (!validationResult.first()) {
jobClusterSubmitError.increment();
return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"" + validationResult.second() + "\"}");
}
jobClusterSubmit.increment();
return completeWithFuture(jobClusterRouteHandler.submit(JobClusterProtoAdapter.toSubmitJobClusterRequest(mjd)).thenApply(this::toHttpResponse));
} catch (Exception e) {
logger.warn("exception in submit job request {}", request, e);
jobClusterSubmitError.increment();
return complete(StatusCodes.INTERNAL_SERVER_ERROR, "{\"error\": \"" + e.getMessage() + "\"}");
}
}))), pathPrefix(API_V0_JOBCLUSTER, () -> route(post(() -> route(path("create", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), jobClusterDefn -> {
logger.debug("/api/namedjob/create called {}", jobClusterDefn);
try {
final NamedJobDefinition namedJobDefinition = Jackson.fromJSON(jobClusterDefn, NamedJobDefinition.class);
if (namedJobDefinition == null || namedJobDefinition.getJobDefinition() == null || namedJobDefinition.getJobDefinition().getJobJarFileLocation() == null || namedJobDefinition.getJobDefinition().getName() == null || namedJobDefinition.getJobDefinition().getName().isEmpty()) {
logger.warn("JobCluster create request must include name and URL {}", jobClusterDefn);
return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"Job definition must include name and URL\"}");
}
final CompletionStage<CreateJobClusterResponse> response = jobClusterRouteHandler.create(JobClusterProtoAdapter.toCreateJobClusterRequest(namedJobDefinition));
jobClusterCreate.increment();
return completeWithFuture(response.thenApply(r -> {
if ((r.responseCode == CLIENT_ERROR || r.responseCode == CLIENT_ERROR_CONFLICT) && r.message.contains("already exists")) {
return new CreateJobClusterResponse(r.requestId, SERVER_ERROR, r.message, r.getJobClusterName());
}
return r;
}).thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error creating JobCluster {}", jobClusterDefn, e);
jobClusterCreateError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't read valid json in request: " + e.getMessage());
} catch (Exception e) {
logger.warn("Error creating JobCluster {}", jobClusterDefn, e);
jobClusterCreateError.increment();
return complete(StatusCodes.INTERNAL_SERVER_ERROR, "{\"error\": " + e.getMessage() + "}");
}
}))), path("update", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), jobClusterDefn -> {
logger.debug("/api/namedjob/update called {}", jobClusterDefn);
try {
final NamedJobDefinition namedJobDefinition = Jackson.fromJSON(jobClusterDefn, NamedJobDefinition.class);
if (namedJobDefinition == null || namedJobDefinition.getJobDefinition() == null || namedJobDefinition.getJobDefinition().getJobJarFileLocation() == null || namedJobDefinition.getJobDefinition().getName() == null || namedJobDefinition.getJobDefinition().getName().isEmpty()) {
logger.warn("JobCluster update request must include name and URL {}", jobClusterDefn);
jobClusterCreateUpdateError.increment();
return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"Job definition must include name and URL\"}");
}
final CompletionStage<UpdateJobClusterResponse> response = jobClusterRouteHandler.update(JobClusterProtoAdapter.toUpdateJobClusterRequest(namedJobDefinition));
jobClusterCreateUpdate.increment();
return completeWithFuture(response.thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error updating JobCluster {}", jobClusterDefn, e);
jobClusterCreateUpdateError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't read valid json in request: " + e.getMessage());
} catch (Exception e) {
logger.warn("Error updating JobCluster {}", jobClusterDefn, e);
jobClusterCreateUpdateError.increment();
return complete(StatusCodes.INTERNAL_SERVER_ERROR, "{\"error\": " + e.getMessage() + "}");
}
}))), path("delete", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), deleteReq -> {
logger.debug("/api/namedjob/delete called {}", deleteReq);
try {
final DeleteJobClusterRequest deleteJobClusterRequest = Jackson.fromJSON(deleteReq, DeleteJobClusterRequest.class);
final CompletionStage<DeleteJobClusterResponse> response = jobClusterRouteHandler.delete(deleteJobClusterRequest);
jobClusterDelete.increment();
return completeWithFuture(response.thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error deleting JobCluster {}", deleteReq, e);
jobClusterDeleteError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("disable", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/disable called {}", request);
try {
final DisableJobClusterRequest disableJobClusterRequest = Jackson.fromJSON(request, DisableJobClusterRequest.class);
final CompletionStage<DisableJobClusterResponse> response = jobClusterRouteHandler.disable(disableJobClusterRequest);
jobClusterDisable.increment();
return completeWithFuture(response.thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error disabling JobCluster {}", request, e);
jobClusterDisableError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("enable", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/enable called {}", request);
try {
final EnableJobClusterRequest enableJobClusterRequest = Jackson.fromJSON(request, EnableJobClusterRequest.class);
final CompletionStage<EnableJobClusterResponse> response = jobClusterRouteHandler.enable(enableJobClusterRequest);
jobClusterEnable.increment();
return completeWithFuture(response.thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error enabling JobCluster {}", request, e);
jobClusterEnableError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("quickupdate", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/quickupdate called {}", request);
try {
final UpdateJobClusterArtifactRequest updateJobClusterArtifactRequest = Jackson.fromJSON(request, UpdateJobClusterArtifactRequest.class);
final CompletionStage<UpdateJobClusterArtifactResponse> response = jobClusterRouteHandler.updateArtifact(updateJobClusterArtifactRequest);
jobClusterQuickupdate.increment();
return completeWithFuture(response.thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error on quickupdate for JobCluster {}", request, e);
jobClusterQuickupdateError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("updatelabels", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/updatelabels called {}", request);
try {
final UpdateJobClusterLabelsRequest updateJobClusterLabelsRequest = Jackson.fromJSON(request, UpdateJobClusterLabelsRequest.class);
jobClusterUpdateLabel.increment();
return completeWithFuture(jobClusterRouteHandler.updateLabels(updateJobClusterLabelsRequest).thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error updating labels for JobCluster {}", request, e);
jobClusterUpdateLabelError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("updatesla", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/updatesla called {}", request);
jobClusterUpdateSla.increment();
try {
final UpdateJobClusterSLARequest updateJobClusterSLARequest = Jackson.fromJSON(request, UpdateJobClusterSLARequest.class);
return completeWithFuture(jobClusterRouteHandler.updateSLA(updateJobClusterSLARequest).thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error updating SLA for JobCluster {}", request, e);
jobClusterUpdateSlaError.increment();
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("migratestrategy", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/migratestrategy called {}", request);
try {
final UpdateJobClusterWorkerMigrationStrategyRequest updateMigrateStrategyReq = Jackson.fromJSON(request, UpdateJobClusterWorkerMigrationStrategyRequest.class);
return completeWithFuture(jobClusterRouteHandler.updateWorkerMigrateStrategy(updateMigrateStrategyReq).thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error updating migrate strategy for JobCluster {}", request, e);
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))), path("quicksubmit", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
logger.debug("/api/namedjob/quicksubmit called {}", request);
try {
final JobClusterManagerProto.SubmitJobRequest submitJobRequest = Jackson.fromJSON(request, JobClusterManagerProto.SubmitJobRequest.class);
return completeWithFuture(jobClusterRouteHandler.submit(submitJobRequest).thenApply(this::toHttpResponse));
} catch (IOException e) {
logger.warn("Error on quick submit for JobCluster {}", request, e);
return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
}
}))))), get(() -> route(pathPrefix("list", () -> route(pathEndOrSingleSlash(() -> {
logger.debug("/api/namedjob/list called");
jobClusterListGET.increment();
return alwaysCache(cache, requestUriKeyer, () -> extractUri(uri -> completeAsync(jobClusterRouteHandler.getAllJobClusters(new ListJobClustersRequest()), resp -> completeOK(resp.getJobClusters().stream().map(jobClusterMetadataView -> JobClusterProtoAdapter.toJobClusterInfo(jobClusterMetadataView)).collect(Collectors.toList()), Jackson.marshaller()), resp -> completeOK(Collections.emptyList(), Jackson.marshaller()))));
}), path(PathMatchers.segment(), (jobCluster) -> {
if (logger.isDebugEnabled()) {
logger.debug("/api/namedjob/list/{} called", jobCluster);
}
jobClusterListClusterGET.increment();
return completeAsync(jobClusterRouteHandler.getJobClusterDetails(new JobClusterManagerProto.GetJobClusterRequest(jobCluster)), resp -> completeOK(resp.getJobCluster().map(jc -> Arrays.asList(jc)).orElse(Collections.emptyList()), Jackson.marshaller()), resp -> completeOK(Collections.emptyList(), Jackson.marshaller()));
}))), path(segment("listJobIds").slash(PathMatchers.segment()), (jobCluster) -> {
logger.debug("/api/namedjob/listJobIds/{} called", jobCluster);
jobClusterListJobIdGET.increment();
return jobClusterListRoute(jobCluster);
}), path("listJobIds", () -> {
logger.debug("/api/namedjob/listJobIds called");
return complete(StatusCodes.BAD_REQUEST, "Specify the Job cluster name '/api/namedjob/listJobIds/<JobClusterName>' to list the job Ids");
}))))));
}
use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.
the class JobListHelperActor method onJobList.
private void onJobList(ListJobRequestWrapper request) {
ActorRef sender = getSender();
Timeout t = new Timeout(Duration.create(500, TimeUnit.MILLISECONDS));
List<MantisJobMetadataView> resultList = Lists.newArrayList();
getJobClustersMatchingRegex(request.jobClusterInfoMap.values(), request.listJobsRequest.getCriteria()).flatMap((jobClusterInfo) -> {
CompletionStage<JobClusterManagerProto.ListJobsResponse> respCS = ask(jobClusterInfo.jobClusterActor, request.listJobsRequest, t).thenApply(JobClusterManagerProto.ListJobsResponse.class::cast);
return Observable.from(respCS.toCompletableFuture(), Schedulers.io()).onErrorResumeNext(ex -> {
logger.warn("caught exception {}", ex.getMessage(), ex);
return Observable.empty();
});
}).filter(Objects::nonNull).flatMapIterable((listJobsResp) -> listJobsResp.getJobList()).toSortedList((o1, o2) -> Long.compare(o1.getJobMetadata().getSubmittedAt(), o2.getJobMetadata().getSubmittedAt())).subscribeOn(Schedulers.computation()).subscribe(resultList::addAll, (e) -> {
request.sender.tell(new JobClusterManagerProto.ListJobsResponse(request.listJobsRequest.requestId, SERVER_ERROR, e.getMessage(), resultList), sender);
}, () -> {
// todo limit is applied at cluster level as well if(request.listJobsRequest.getCriteria().getLimit().isPresent()) {
// int limit = request.listJobsRequest.getCriteria().getLimit().get();
// request.sender.tell(new JobClusterManagerProto.ListJobsResponse(request.listJobsRequest.requestId, SUCCESS, "", resultList.subList(0, Math.min(resultList.size(), limit))), sender);
// }
request.sender.tell(new JobClusterManagerProto.ListJobsResponse(request.listJobsRequest.requestId, SUCCESS, "", resultList), sender);
});
}
Aggregations