Search in sources :

Example 1 with SERVER_ERROR

use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.

the class JobClustersManagerActor method initialize.

private void initialize(JobClustersManagerInitialize initMsg) {
    ActorRef sender = getSender();
    try {
        logger.info("In JobClustersManagerActor:initialize");
        this.jobListHelperActor = getContext().actorOf(JobListHelperActor.props(), "JobListHelperActor");
        getContext().watch(jobListHelperActor);
        mantisScheduler = initMsg.getScheduler();
        Map<String, IJobClusterMetadata> jobClusterMap = new HashMap<>();
        this.jobClusterInfoManager = new JobClusterInfoManager(jobStore, mantisScheduler, eventPublisher);
        if (!initMsg.isLoadJobsFromStore()) {
            getContext().become(initializedBehavior);
            sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
        } else {
            List<IJobClusterMetadata> jobClusters = jobStore.loadAllJobClusters();
            logger.info("Read {} job clusters from storage", jobClusters.size());
            List<IMantisJobMetadata> activeJobs = jobStore.loadAllActiveJobs();
            logger.info("Read {} jobs from storage", activeJobs.size());
            List<CompletedJob> completedJobs = jobStore.loadAllCompletedJobs();
            logger.info("Read {} completed jobs from storage", completedJobs.size());
            for (IJobClusterMetadata jobClusterMeta : jobClusters) {
                String clusterName = jobClusterMeta.getJobClusterDefinition().getName();
                jobClusterMap.put(clusterName, jobClusterMeta);
            }
            Map<String, List<IMantisJobMetadata>> clusterToJobMap = new HashMap<>();
            Map<String, List<CompletedJob>> clusterToCompletedJobMap = new HashMap<>();
            // group jobs by cluster
            for (IMantisJobMetadata jobMeta : activeJobs) {
                String clusterName = jobMeta.getClusterName();
                clusterToJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
            }
            for (CompletedJob jobMeta : completedJobs) {
                String clusterName = jobMeta.getName();
                clusterToCompletedJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
            }
            long masterInitTimeoutSecs = ConfigurationProvider.getConfig().getMasterInitTimeoutSecs();
            long timeout = ((masterInitTimeoutSecs - 60)) > 0 ? (masterInitTimeoutSecs - 60) : masterInitTimeoutSecs;
            Observable.from(jobClusterMap.values()).filter((jobClusterMeta) -> jobClusterMeta != null && jobClusterMeta.getJobClusterDefinition() != null).flatMap((jobClusterMeta) -> {
                Duration t = Duration.ofSeconds(timeout);
                Optional<JobClusterInfo> jobClusterInfoO = jobClusterInfoManager.createClusterActorAndRegister(jobClusterMeta.getJobClusterDefinition());
                if (!jobClusterInfoO.isPresent()) {
                    logger.info("skipping job cluster {} on bootstrap as actor creating failed", jobClusterMeta.getJobClusterDefinition().getName());
                    return Observable.empty();
                }
                JobClusterInfo jobClusterInfo = jobClusterInfoO.get();
                List<IMantisJobMetadata> jobList = Lists.newArrayList();
                List<IMantisJobMetadata> jList = clusterToJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
                if (jList != null) {
                    jobList.addAll(jList);
                }
                List<CompletedJob> completedJobsList = Lists.newArrayList();
                List<CompletedJob> cList = clusterToCompletedJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
                if (cList != null) {
                    completedJobsList.addAll(cList);
                }
                JobClusterProto.InitializeJobClusterRequest req = new JobClusterProto.InitializeJobClusterRequest((JobClusterDefinitionImpl) jobClusterMeta.getJobClusterDefinition(), jobClusterMeta.isDisabled(), jobClusterMeta.getLastJobCount(), jobList, completedJobsList, "system", getSelf(), false);
                return jobClusterInfoManager.initializeCluster(jobClusterInfo, req, t);
            }).filter(Objects::nonNull).toBlocking().subscribe((clusterInit) -> {
                logger.info("JobCluster {} inited with code {}", clusterInit.jobClusterName, clusterInit.responseCode);
                numJobClusterInitSuccesses.increment();
            }, (error) -> {
                logger.warn("Exception initializing clusters {}", error.getMessage(), error);
                logger.error("JobClusterManagerActor had errors during initialization NOT transitioning to initialized behavior");
                // getContext().become(initializedBehavior);
                sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, "JobClustersManager  inited with errors"), getSelf());
            }, () -> {
                logger.info("JobClusterManagerActor transitioning to initialized behavior");
                getContext().become(initializedBehavior);
                sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
            });
            getTimers().startPeriodicTimer(CHECK_CLUSTERS_TIMER_KEY, new ReconcileJobCluster(), Duration.ofSeconds(checkAgainInSecs));
            // kick off loading of archived jobs
            logger.info("Kicking off archived job load asynchronously");
            jobStore.loadAllArchivedJobsAsync();
        }
    } catch (Exception e) {
        logger.error("caught exception", e);
        sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, e.getMessage()), getSelf());
    }
    logger.info("JobClustersManagerActor:initialize ends");
}
Also used : JobId(io.mantisrx.server.master.domain.JobId) Terminated(akka.actor.Terminated) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) GetLastSubmittedJobIdStreamResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamResponse) ListArchivedWorkersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersRequest) CreateJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.CreateJobClusterRequest) UpdateJobClusterLabelsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsResponse) ActorRef(akka.actor.ActorRef) UpdateJobClusterArtifactResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactResponse) DeleteJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterRequest) Duration(java.time.Duration) Map(java.util.Map) Schedulers(rx.schedulers.Schedulers) Metrics(io.mantisrx.common.metrics.Metrics) DisableJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterResponse) ListCompletedJobsInClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListCompletedJobsInClusterResponse) Set(java.util.Set) ListCompletedJobsInClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListCompletedJobsInClusterRequest) ScaleStageRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest) BaseResponse(io.mantisrx.master.jobcluster.proto.BaseResponse) SUCCESS_CREATED(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS_CREATED) SubmitJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest) CompletionStage(java.util.concurrent.CompletionStage) UpdateJobClusterWorkerMigrationStrategyRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterWorkerMigrationStrategyRequest) ListJobIdsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobIdsRequest) JobHelper(io.mantisrx.master.jobcluster.job.JobHelper) CLIENT_ERROR_CONFLICT(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR_CONFLICT) ListWorkersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListWorkersRequest) IJobClusterMetadata(io.mantisrx.master.jobcluster.IJobClusterMetadata) Optional.empty(java.util.Optional.empty) SERVER_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR) ListJobClustersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobClustersRequest) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) ArrayList(java.util.ArrayList) ListJobIdsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobIdsResponse) SupervisorStrategy(akka.actor.SupervisorStrategy) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) GetLastSubmittedJobIdStreamRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamRequest) KillJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobRequest) EnableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterRequest) CreateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.CreateJobClusterResponse) GetJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest) UpdateJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterRequest) CLIENT_ERROR_NOT_FOUND(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR_NOT_FOUND) UpdateJobClusterWorkerMigrationStrategyResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterWorkerMigrationStrategyResponse) CompletedJob(io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) KillJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse) ListJobsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsResponse) ListJobClustersResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobClustersResponse) EnableJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterResponse) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) GetJobSchedInfoRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobSchedInfoRequest) JobState(io.mantisrx.master.jobcluster.job.JobState) LoggerFactory(org.slf4j.LoggerFactory) GaugeCallback(io.mantisrx.common.metrics.spectator.GaugeCallback) JobCompletedReason(io.mantisrx.server.core.JobCompletedReason) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) ListJobsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsRequest) CLIENT_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR) GetJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) GetLatestJobDiscoveryInfoResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLatestJobDiscoveryInfoResponse) JobClustersManagerInitialize(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitialize) ReconcileJobCluster(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ReconcileJobCluster) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) JobClusterActor(io.mantisrx.master.jobcluster.JobClusterActor) ResubmitWorkerResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerResponse) DisableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterRequest) ResubmitWorkerRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerRequest) UpdateJobClusterLabelsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsRequest) Collectors(java.util.stream.Collectors) GetJobSchedInfoResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobSchedInfoResponse) Objects(java.util.Objects) List(java.util.List) ActorPaths(akka.actor.ActorPaths) PatternsCS.ask(akka.pattern.PatternsCS.ask) SubmitJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobResponse) GetJobDetailsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest) ConfigurationProvider(io.mantisrx.server.master.config.ConfigurationProvider) Optional(java.util.Optional) Props(akka.actor.Props) UpdateJobClusterSLARequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLARequest) ScaleStageResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageResponse) UpdateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterResponse) ListArchivedWorkersResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersResponse) HashMap(java.util.HashMap) UpdateJobClusterSLAResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLAResponse) MantisActorSupervisorStrategy(io.mantisrx.master.akka.MantisActorSupervisorStrategy) Observable(rx.Observable) DeleteJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterResponse) ListWorkersResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListWorkersResponse) AbstractActorWithTimers(akka.actor.AbstractActorWithTimers) Counter(io.mantisrx.common.metrics.Counter) Logger(org.slf4j.Logger) Optional.ofNullable(java.util.Optional.ofNullable) GetLatestJobDiscoveryInfoRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLatestJobDiscoveryInfoRequest) SUCCESS(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS) Lists(io.mantisrx.shaded.com.google.common.collect.Lists) UpdateJobClusterArtifactRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactRequest) Collections(java.util.Collections) JobClustersManagerInitializeResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse) LifecycleEventPublisher(io.mantisrx.master.events.LifecycleEventPublisher) MetricGroupId(io.mantisrx.common.metrics.spectator.MetricGroupId) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) HashMap(java.util.HashMap) ActorRef(akka.actor.ActorRef) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ReconcileJobCluster(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ReconcileJobCluster) JobClustersManagerInitializeResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse) Optional(java.util.Optional) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) Duration(java.time.Duration) CompletedJob(io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob) Objects(java.util.Objects) IJobClusterMetadata(io.mantisrx.master.jobcluster.IJobClusterMetadata)

Example 2 with SERVER_ERROR

use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.

the class JobListHelperActor method onJobClustersList.

private void onJobClustersList(ListJobClusterRequestWrapper request) {
    if (logger.isTraceEnabled()) {
        logger.trace("In onJobClustersListRequest {}", request);
    }
    ActorRef callerActor = getSender();
    Timeout timeout = new Timeout(Duration.create(500, TimeUnit.MILLISECONDS));
    List<MantisJobClusterMetadataView> clusterList = Lists.newArrayList();
    Observable.from(request.jobClusterInfoMap.values()).flatMap((jInfo) -> {
        CompletionStage<JobClusterManagerProto.GetJobClusterResponse> respCS = ask(jInfo.jobClusterActor, new JobClusterManagerProto.GetJobClusterRequest(jInfo.clusterName), timeout).thenApply(JobClusterManagerProto.GetJobClusterResponse.class::cast);
        return Observable.from(respCS.toCompletableFuture(), Schedulers.io()).onErrorResumeNext(ex -> {
            logger.warn("caught exception {}", ex.getMessage(), ex);
            return Observable.empty();
        });
    }).filter((resp) -> resp != null && resp.getJobCluster().isPresent()).map((resp) -> resp.getJobCluster().get()).doOnError(this::logError).subscribeOn(Schedulers.computation()).subscribe(clusterList::add, (err) -> {
        logger.warn("Exception in onJobClusterList ", err);
        if (logger.isTraceEnabled()) {
            logger.trace("Exit onJobClustersListRequest {}", err);
        }
        request.sender.tell(new JobClusterManagerProto.ListJobClustersResponse(request.listJobClustersRequest.requestId, SERVER_ERROR, err.getMessage(), clusterList), callerActor);
    }, () -> {
        if (logger.isTraceEnabled()) {
            logger.trace("Exit onJobClustersListRequest {}", clusterList);
        }
        request.sender.tell(new JobClusterManagerProto.ListJobClustersResponse(request.listJobClustersRequest.requestId, SUCCESS, "", clusterList), callerActor);
    });
}
Also used : SERVER_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR) Logger(org.slf4j.Logger) Collection(java.util.Collection) LoggerFactory(org.slf4j.LoggerFactory) SUCCESS(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS) Timeout(akka.util.Timeout) Observable(rx.Observable) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) List(java.util.List) Duration(scala.concurrent.duration.Duration) Lists(io.mantisrx.shaded.com.google.common.collect.Lists) CompletionStage(java.util.concurrent.CompletionStage) ActorRef(akka.actor.ActorRef) PatternsCS.ask(akka.pattern.PatternsCS.ask) Map(java.util.Map) AbstractActor(akka.actor.AbstractActor) JobClusterProtoAdapter(io.mantisrx.master.api.akka.route.proto.JobClusterProtoAdapter) Schedulers(rx.schedulers.Schedulers) MantisJobClusterMetadataView(io.mantisrx.master.jobcluster.MantisJobClusterMetadataView) Pattern(java.util.regex.Pattern) Props(akka.actor.Props) MantisJobMetadataView(io.mantisrx.master.jobcluster.job.MantisJobMetadataView) ActorRef(akka.actor.ActorRef) Timeout(akka.util.Timeout) MantisJobClusterMetadataView(io.mantisrx.master.jobcluster.MantisJobClusterMetadataView) CompletionStage(java.util.concurrent.CompletionStage) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)

Example 3 with SERVER_ERROR

use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.

the class JobClusterRoute method getJobClusterRoutes.

private Route getJobClusterRoutes() {
    return route(path(segment("api").slash("submit"), () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/submit called {}", request);
        try {
            MantisJobDefinition mjd = Jackson.fromJSON(request, MantisJobDefinition.class);
            logger.debug("job submit request {}", mjd);
            mjd.validate(true);
            Pair<Boolean, String> validationResult = validateSubmitJobRequest(mjd);
            if (!validationResult.first()) {
                jobClusterSubmitError.increment();
                return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"" + validationResult.second() + "\"}");
            }
            jobClusterSubmit.increment();
            return completeWithFuture(jobClusterRouteHandler.submit(JobClusterProtoAdapter.toSubmitJobClusterRequest(mjd)).thenApply(this::toHttpResponse));
        } catch (Exception e) {
            logger.warn("exception in submit job request {}", request, e);
            jobClusterSubmitError.increment();
            return complete(StatusCodes.INTERNAL_SERVER_ERROR, "{\"error\": \"" + e.getMessage() + "\"}");
        }
    }))), pathPrefix(API_V0_JOBCLUSTER, () -> route(post(() -> route(path("create", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), jobClusterDefn -> {
        logger.debug("/api/namedjob/create called {}", jobClusterDefn);
        try {
            final NamedJobDefinition namedJobDefinition = Jackson.fromJSON(jobClusterDefn, NamedJobDefinition.class);
            if (namedJobDefinition == null || namedJobDefinition.getJobDefinition() == null || namedJobDefinition.getJobDefinition().getJobJarFileLocation() == null || namedJobDefinition.getJobDefinition().getName() == null || namedJobDefinition.getJobDefinition().getName().isEmpty()) {
                logger.warn("JobCluster create request must include name and URL {}", jobClusterDefn);
                return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"Job definition must include name and URL\"}");
            }
            final CompletionStage<CreateJobClusterResponse> response = jobClusterRouteHandler.create(JobClusterProtoAdapter.toCreateJobClusterRequest(namedJobDefinition));
            jobClusterCreate.increment();
            return completeWithFuture(response.thenApply(r -> {
                if ((r.responseCode == CLIENT_ERROR || r.responseCode == CLIENT_ERROR_CONFLICT) && r.message.contains("already exists")) {
                    return new CreateJobClusterResponse(r.requestId, SERVER_ERROR, r.message, r.getJobClusterName());
                }
                return r;
            }).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error creating JobCluster {}", jobClusterDefn, e);
            jobClusterCreateError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't read valid json in request: " + e.getMessage());
        } catch (Exception e) {
            logger.warn("Error creating JobCluster {}", jobClusterDefn, e);
            jobClusterCreateError.increment();
            return complete(StatusCodes.INTERNAL_SERVER_ERROR, "{\"error\": " + e.getMessage() + "}");
        }
    }))), path("update", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), jobClusterDefn -> {
        logger.debug("/api/namedjob/update called {}", jobClusterDefn);
        try {
            final NamedJobDefinition namedJobDefinition = Jackson.fromJSON(jobClusterDefn, NamedJobDefinition.class);
            if (namedJobDefinition == null || namedJobDefinition.getJobDefinition() == null || namedJobDefinition.getJobDefinition().getJobJarFileLocation() == null || namedJobDefinition.getJobDefinition().getName() == null || namedJobDefinition.getJobDefinition().getName().isEmpty()) {
                logger.warn("JobCluster update request must include name and URL {}", jobClusterDefn);
                jobClusterCreateUpdateError.increment();
                return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"Job definition must include name and URL\"}");
            }
            final CompletionStage<UpdateJobClusterResponse> response = jobClusterRouteHandler.update(JobClusterProtoAdapter.toUpdateJobClusterRequest(namedJobDefinition));
            jobClusterCreateUpdate.increment();
            return completeWithFuture(response.thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error updating JobCluster {}", jobClusterDefn, e);
            jobClusterCreateUpdateError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't read valid json in request: " + e.getMessage());
        } catch (Exception e) {
            logger.warn("Error updating JobCluster {}", jobClusterDefn, e);
            jobClusterCreateUpdateError.increment();
            return complete(StatusCodes.INTERNAL_SERVER_ERROR, "{\"error\": " + e.getMessage() + "}");
        }
    }))), path("delete", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), deleteReq -> {
        logger.debug("/api/namedjob/delete called {}", deleteReq);
        try {
            final DeleteJobClusterRequest deleteJobClusterRequest = Jackson.fromJSON(deleteReq, DeleteJobClusterRequest.class);
            final CompletionStage<DeleteJobClusterResponse> response = jobClusterRouteHandler.delete(deleteJobClusterRequest);
            jobClusterDelete.increment();
            return completeWithFuture(response.thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error deleting JobCluster {}", deleteReq, e);
            jobClusterDeleteError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("disable", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/disable called {}", request);
        try {
            final DisableJobClusterRequest disableJobClusterRequest = Jackson.fromJSON(request, DisableJobClusterRequest.class);
            final CompletionStage<DisableJobClusterResponse> response = jobClusterRouteHandler.disable(disableJobClusterRequest);
            jobClusterDisable.increment();
            return completeWithFuture(response.thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error disabling JobCluster {}", request, e);
            jobClusterDisableError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("enable", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/enable called {}", request);
        try {
            final EnableJobClusterRequest enableJobClusterRequest = Jackson.fromJSON(request, EnableJobClusterRequest.class);
            final CompletionStage<EnableJobClusterResponse> response = jobClusterRouteHandler.enable(enableJobClusterRequest);
            jobClusterEnable.increment();
            return completeWithFuture(response.thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error enabling JobCluster {}", request, e);
            jobClusterEnableError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("quickupdate", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/quickupdate called {}", request);
        try {
            final UpdateJobClusterArtifactRequest updateJobClusterArtifactRequest = Jackson.fromJSON(request, UpdateJobClusterArtifactRequest.class);
            final CompletionStage<UpdateJobClusterArtifactResponse> response = jobClusterRouteHandler.updateArtifact(updateJobClusterArtifactRequest);
            jobClusterQuickupdate.increment();
            return completeWithFuture(response.thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error on quickupdate for JobCluster {}", request, e);
            jobClusterQuickupdateError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("updatelabels", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/updatelabels called {}", request);
        try {
            final UpdateJobClusterLabelsRequest updateJobClusterLabelsRequest = Jackson.fromJSON(request, UpdateJobClusterLabelsRequest.class);
            jobClusterUpdateLabel.increment();
            return completeWithFuture(jobClusterRouteHandler.updateLabels(updateJobClusterLabelsRequest).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error updating labels for JobCluster {}", request, e);
            jobClusterUpdateLabelError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("updatesla", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/updatesla called {}", request);
        jobClusterUpdateSla.increment();
        try {
            final UpdateJobClusterSLARequest updateJobClusterSLARequest = Jackson.fromJSON(request, UpdateJobClusterSLARequest.class);
            return completeWithFuture(jobClusterRouteHandler.updateSLA(updateJobClusterSLARequest).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error updating SLA for JobCluster {}", request, e);
            jobClusterUpdateSlaError.increment();
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("migratestrategy", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/migratestrategy called {}", request);
        try {
            final UpdateJobClusterWorkerMigrationStrategyRequest updateMigrateStrategyReq = Jackson.fromJSON(request, UpdateJobClusterWorkerMigrationStrategyRequest.class);
            return completeWithFuture(jobClusterRouteHandler.updateWorkerMigrateStrategy(updateMigrateStrategyReq).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error updating migrate strategy for JobCluster {}", request, e);
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))), path("quicksubmit", () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), request -> {
        logger.debug("/api/namedjob/quicksubmit called {}", request);
        try {
            final JobClusterManagerProto.SubmitJobRequest submitJobRequest = Jackson.fromJSON(request, JobClusterManagerProto.SubmitJobRequest.class);
            return completeWithFuture(jobClusterRouteHandler.submit(submitJobRequest).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error on quick submit for JobCluster {}", request, e);
            return complete(StatusCodes.BAD_REQUEST, "Can't find valid json in request: " + e.getMessage());
        }
    }))))), get(() -> route(pathPrefix("list", () -> route(pathEndOrSingleSlash(() -> {
        logger.debug("/api/namedjob/list called");
        jobClusterListGET.increment();
        return alwaysCache(cache, requestUriKeyer, () -> extractUri(uri -> completeAsync(jobClusterRouteHandler.getAllJobClusters(new ListJobClustersRequest()), resp -> completeOK(resp.getJobClusters().stream().map(jobClusterMetadataView -> JobClusterProtoAdapter.toJobClusterInfo(jobClusterMetadataView)).collect(Collectors.toList()), Jackson.marshaller()), resp -> completeOK(Collections.emptyList(), Jackson.marshaller()))));
    }), path(PathMatchers.segment(), (jobCluster) -> {
        if (logger.isDebugEnabled()) {
            logger.debug("/api/namedjob/list/{} called", jobCluster);
        }
        jobClusterListClusterGET.increment();
        return completeAsync(jobClusterRouteHandler.getJobClusterDetails(new JobClusterManagerProto.GetJobClusterRequest(jobCluster)), resp -> completeOK(resp.getJobCluster().map(jc -> Arrays.asList(jc)).orElse(Collections.emptyList()), Jackson.marshaller()), resp -> completeOK(Collections.emptyList(), Jackson.marshaller()));
    }))), path(segment("listJobIds").slash(PathMatchers.segment()), (jobCluster) -> {
        logger.debug("/api/namedjob/listJobIds/{} called", jobCluster);
        jobClusterListJobIdGET.increment();
        return jobClusterListRoute(jobCluster);
    }), path("listJobIds", () -> {
        logger.debug("/api/namedjob/listJobIds called");
        return complete(StatusCodes.BAD_REQUEST, "Specify the Job cluster name '/api/namedjob/listJobIds/<JobClusterName>' to list the job Ids");
    }))))));
}
Also used : Strings(io.mantisrx.shaded.com.google.common.base.Strings) EnableJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterResponse) Uri(akka.http.javadsl.model.Uri) MasterConfiguration(io.mantisrx.server.master.config.MasterConfiguration) Arrays(java.util.Arrays) JavaPartialFunction(akka.japi.JavaPartialFunction) Cache(akka.http.caching.javadsl.Cache) JobRouteHandler(io.mantisrx.master.api.akka.route.handlers.JobRouteHandler) LoggerFactory(org.slf4j.LoggerFactory) LfuCache(akka.http.caching.LfuCache) RequestContext(akka.http.javadsl.server.RequestContext) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) PathMatchers.segment(akka.http.javadsl.server.PathMatchers.segment) MantisJobDefinition(io.mantisrx.runtime.MantisJobDefinition) CLIENT_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR) StringUnmarshallers(akka.http.javadsl.unmarshalling.StringUnmarshallers) PathMatcher0(akka.http.javadsl.server.PathMatcher0) LfuCacheSettings(akka.http.caching.javadsl.LfuCacheSettings) UpdateJobClusterArtifactResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactResponse) DeleteJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterRequest) Map(java.util.Map) JobClusterProtoAdapter(io.mantisrx.master.api.akka.route.proto.JobClusterProtoAdapter) Unmarshaller(akka.http.javadsl.unmarshalling.Unmarshaller) Metrics(io.mantisrx.common.metrics.Metrics) DisableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterRequest) DisableJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterResponse) Jackson(io.mantisrx.master.api.akka.route.Jackson) UpdateJobClusterLabelsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsRequest) HttpMethods(akka.http.javadsl.model.HttpMethods) Collectors(java.util.stream.Collectors) ExceptionHandler(akka.http.javadsl.server.ExceptionHandler) JobClusterRouteHandler(io.mantisrx.master.api.akka.route.handlers.JobClusterRouteHandler) Duration(scala.concurrent.duration.Duration) CompletionStage(java.util.concurrent.CompletionStage) UpdateJobClusterWorkerMigrationStrategyRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterWorkerMigrationStrategyRequest) JobRouteUtils.createListJobIdsRequest(io.mantisrx.master.api.akka.route.utils.JobRouteUtils.createListJobIdsRequest) CachingDirectives.alwaysCache(akka.http.javadsl.server.directives.CachingDirectives.alwaysCache) ActorSystem(akka.actor.ActorSystem) ConfigurationProvider(io.mantisrx.server.master.config.ConfigurationProvider) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) Optional(java.util.Optional) CLIENT_ERROR_CONFLICT(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR_CONFLICT) UpdateJobClusterSLARequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLARequest) SERVER_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR) UpdateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterResponse) ListJobClustersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobClustersRequest) Route(akka.http.javadsl.server.Route) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) StatusCodes(akka.http.javadsl.model.StatusCodes) Function(java.util.function.Function) PathMatchers(akka.http.javadsl.server.PathMatchers) DeleteJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterResponse) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) Counter(io.mantisrx.common.metrics.Counter) Logger(org.slf4j.Logger) RouteResult(akka.http.javadsl.server.RouteResult) HttpHeader(akka.http.javadsl.model.HttpHeader) HttpRequest(akka.http.javadsl.model.HttpRequest) NamedJobDefinition(io.mantisrx.runtime.NamedJobDefinition) EnableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterRequest) IOException(java.io.IOException) CreateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.CreateJobClusterResponse) Pair(akka.japi.Pair) TimeUnit(java.util.concurrent.TimeUnit) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) UpdateJobClusterArtifactRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactRequest) CachingSettings(akka.http.caching.javadsl.CachingSettings) Collections(java.util.Collections) UpdateJobClusterLabelsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsRequest) UpdateJobClusterSLARequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLARequest) MantisJobDefinition(io.mantisrx.runtime.MantisJobDefinition) ListJobClustersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobClustersRequest) IOException(java.io.IOException) IOException(java.io.IOException) DisableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterRequest) UpdateJobClusterArtifactRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactRequest) EnableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterRequest) UpdateJobClusterWorkerMigrationStrategyRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterWorkerMigrationStrategyRequest) DeleteJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterRequest) NamedJobDefinition(io.mantisrx.runtime.NamedJobDefinition) CreateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.CreateJobClusterResponse) CompletionStage(java.util.concurrent.CompletionStage) Pair(akka.japi.Pair) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)

Example 4 with SERVER_ERROR

use of io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR in project mantis by Netflix.

the class JobListHelperActor method onJobList.

private void onJobList(ListJobRequestWrapper request) {
    ActorRef sender = getSender();
    Timeout t = new Timeout(Duration.create(500, TimeUnit.MILLISECONDS));
    List<MantisJobMetadataView> resultList = Lists.newArrayList();
    getJobClustersMatchingRegex(request.jobClusterInfoMap.values(), request.listJobsRequest.getCriteria()).flatMap((jobClusterInfo) -> {
        CompletionStage<JobClusterManagerProto.ListJobsResponse> respCS = ask(jobClusterInfo.jobClusterActor, request.listJobsRequest, t).thenApply(JobClusterManagerProto.ListJobsResponse.class::cast);
        return Observable.from(respCS.toCompletableFuture(), Schedulers.io()).onErrorResumeNext(ex -> {
            logger.warn("caught exception {}", ex.getMessage(), ex);
            return Observable.empty();
        });
    }).filter(Objects::nonNull).flatMapIterable((listJobsResp) -> listJobsResp.getJobList()).toSortedList((o1, o2) -> Long.compare(o1.getJobMetadata().getSubmittedAt(), o2.getJobMetadata().getSubmittedAt())).subscribeOn(Schedulers.computation()).subscribe(resultList::addAll, (e) -> {
        request.sender.tell(new JobClusterManagerProto.ListJobsResponse(request.listJobsRequest.requestId, SERVER_ERROR, e.getMessage(), resultList), sender);
    }, () -> {
        // todo limit is applied at cluster level as well if(request.listJobsRequest.getCriteria().getLimit().isPresent()) {
        // int limit = request.listJobsRequest.getCriteria().getLimit().get();
        // request.sender.tell(new JobClusterManagerProto.ListJobsResponse(request.listJobsRequest.requestId, SUCCESS, "", resultList.subList(0, Math.min(resultList.size(), limit))), sender);
        // }
        request.sender.tell(new JobClusterManagerProto.ListJobsResponse(request.listJobsRequest.requestId, SUCCESS, "", resultList), sender);
    });
}
Also used : SERVER_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR) Logger(org.slf4j.Logger) Collection(java.util.Collection) LoggerFactory(org.slf4j.LoggerFactory) SUCCESS(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS) Timeout(akka.util.Timeout) Observable(rx.Observable) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) List(java.util.List) Duration(scala.concurrent.duration.Duration) Lists(io.mantisrx.shaded.com.google.common.collect.Lists) CompletionStage(java.util.concurrent.CompletionStage) ActorRef(akka.actor.ActorRef) PatternsCS.ask(akka.pattern.PatternsCS.ask) Map(java.util.Map) AbstractActor(akka.actor.AbstractActor) JobClusterProtoAdapter(io.mantisrx.master.api.akka.route.proto.JobClusterProtoAdapter) Schedulers(rx.schedulers.Schedulers) MantisJobClusterMetadataView(io.mantisrx.master.jobcluster.MantisJobClusterMetadataView) Pattern(java.util.regex.Pattern) Props(akka.actor.Props) MantisJobMetadataView(io.mantisrx.master.jobcluster.job.MantisJobMetadataView) MantisJobMetadataView(io.mantisrx.master.jobcluster.job.MantisJobMetadataView) ActorRef(akka.actor.ActorRef) Timeout(akka.util.Timeout) Objects(java.util.Objects) CompletionStage(java.util.concurrent.CompletionStage) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)

Aggregations

SERVER_ERROR (io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR)4 Map (java.util.Map)4 CompletionStage (java.util.concurrent.CompletionStage)4 Logger (org.slf4j.Logger)4 LoggerFactory (org.slf4j.LoggerFactory)4 ActorRef (akka.actor.ActorRef)3 Props (akka.actor.Props)3 PatternsCS.ask (akka.pattern.PatternsCS.ask)3 JobClusterProtoAdapter (io.mantisrx.master.api.akka.route.proto.JobClusterProtoAdapter)3 AbstractActor (akka.actor.AbstractActor)2 Timeout (akka.util.Timeout)2 Counter (io.mantisrx.common.metrics.Counter)2 Metrics (io.mantisrx.common.metrics.Metrics)2 MetricsRegistry (io.mantisrx.common.metrics.MetricsRegistry)2 SUCCESS (io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS)2 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)2 Lists (io.mantisrx.shaded.com.google.common.collect.Lists)2 List (java.util.List)2 Objects (java.util.Objects)2 TimeUnit (java.util.concurrent.TimeUnit)2