Search in sources :

Example 1 with ScaleStageRequest

use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest in project mantis by Netflix.

the class JobRoute method getJobRoutes.

private Route getJobRoutes() {
    return route(path(STATUS_ENDPOINT, () -> post(() -> decodeRequest(() -> entity(Unmarshaller.entityToString(), req -> {
        if (logger.isDebugEnabled()) {
            logger.debug("/api/postjobstatus called {}", req);
        }
        try {
            workerHeartbeatStatusPOST.increment();
            PostJobStatusRequest postJobStatusRequest = Jackson.fromJSON(req, PostJobStatusRequest.class);
            WorkerEvent workerStatusRequest = createWorkerStatusRequest(postJobStatusRequest);
            if (workerStatusRequest instanceof WorkerHeartbeat) {
                if (!ConfigurationProvider.getConfig().isHeartbeatProcessingEnabled()) {
                    // skip heartbeat processing
                    if (logger.isTraceEnabled()) {
                        logger.trace("skipped heartbeat event {}", workerStatusRequest);
                    }
                    workerHeartbeatSkipped.increment();
                    return complete(StatusCodes.OK);
                }
            }
            return completeWithFuture(jobRouteHandler.workerStatus(workerStatusRequest).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error handling job status {}", req, e);
            return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"invalid JSON payload to post job status\"}");
        }
    })))), pathPrefix(API_JOBS, () -> route(post(() -> route(path(KILL_ENDPOINT, () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), req -> {
        logger.debug("/api/jobs/kill called {}", req);
        try {
            final KillJobRequest killJobRequest = Jackson.fromJSON(req, KillJobRequest.class);
            return completeWithFuture(jobRouteHandler.kill(killJobRequest).thenApply(resp -> {
                if (resp.responseCode == BaseResponse.ResponseCode.SUCCESS) {
                    return new JobClusterManagerProto.KillJobResponse(resp.requestId, resp.responseCode, resp.getState(), "[\"" + resp.getJobId().getId() + " Killed\"]", resp.getJobId(), resp.getUser());
                } else if (resp.responseCode == BaseResponse.ResponseCode.CLIENT_ERROR) {
                    // for backwards compatibility with old master
                    return new JobClusterManagerProto.KillJobResponse(resp.requestId, BaseResponse.ResponseCode.SUCCESS, resp.getState(), "[\"" + resp.message + " \"]", resp.getJobId(), resp.getUser());
                }
                return resp;
            }).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error on job kill {}", req, e);
            return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"invalid json payload to kill job\"}");
        }
    }))), path(RESUBMIT_WORKER_ENDPOINT, () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), req -> {
        logger.debug("/api/jobs/resubmitWorker called {}", req);
        try {
            final ResubmitWorkerRequest resubmitWorkerRequest = Jackson.fromJSON(req, ResubmitWorkerRequest.class);
            return completeWithFuture(jobRouteHandler.resubmitWorker(resubmitWorkerRequest).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error on worker resubmit {}", req, e);
            return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"invalid json payload to resubmit worker\"}");
        }
    }))), path(SCALE_STAGE_ENDPOINT, () -> decodeRequest(() -> entity(Unmarshaller.entityToString(), req -> {
        logger.debug("/api/jobs/scaleStage called {}", req);
        try {
            ScaleStageRequest scaleStageRequest = Jackson.fromJSON(req, ScaleStageRequest.class);
            int numWorkers = scaleStageRequest.getNumWorkers();
            int maxWorkersPerStage = ConfigurationProvider.getConfig().getMaxWorkersPerStage();
            if (numWorkers > maxWorkersPerStage) {
                logger.warn("rejecting ScaleStageRequest {} with invalid num workers", scaleStageRequest);
                return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"num workers must be less than " + maxWorkersPerStage + "\"}");
            }
            return completeWithFuture(jobRouteHandler.scaleStage(scaleStageRequest).thenApply(this::toHttpResponse));
        } catch (IOException e) {
            logger.warn("Error scaling stage {}", req, e);
            return complete(StatusCodes.BAD_REQUEST, "{\"error\": \"invalid json payload to scale stage " + e.getMessage() + "\"}");
        }
    }))))), get(() -> route(// - optional labels.op query param - default value is 'or' if not specified (other possible value is 'and'
    path(segment("list"), () -> {
        jobListGET.increment();
        return jobListRoute(Optional.empty());
    }), path(segment("list").slash("matchinglabels"), () -> {
        jobListLabelMatchGET.increment();
        return jobListRoute(Optional.empty());
    }), path(segment("list").slash(PathMatchers.segment()), (jobId) -> {
        logger.debug("/api/jobs/list/{} called", jobId);
        jobListJobIdGET.increment();
        return completeAsync(jobRouteHandler.getJobDetails(new JobClusterManagerProto.GetJobDetailsRequest("masterAPI", jobId)), resp -> {
            Optional<MantisJobMetadataView> mantisJobMetadataView = resp.getJobMetadata().map(metaData -> new MantisJobMetadataView(metaData, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), false));
            return completeOK(mantisJobMetadataView, Jackson.marshaller());
        });
    }), path(segment("list").slash("matching").slash(PathMatchers.segment()), (regex) -> {
        jobListRegexGET.increment();
        return jobListRoute(Optional.ofNullable(regex).filter(r -> !r.isEmpty()));
    }), path(segment("archived").slash(PathMatchers.segment()), (jobId) -> parameterOptional(StringUnmarshallers.INTEGER, "limit", (limit) -> {
        jobArchivedWorkersGET.increment();
        Optional<JobId> jobIdO = JobId.fromId(jobId);
        if (jobIdO.isPresent()) {
            ListArchivedWorkersRequest req = new ListArchivedWorkersRequest(jobIdO.get(), limit.orElse(DEFAULT_LIST_ARCHIVED_WORKERS_LIMIT));
            return alwaysCache(cache, requestUriKeyer, () -> extractUri(uri -> completeAsync(jobRouteHandler.listArchivedWorkers(req), resp -> {
                List<MantisWorkerMetadataWritable> workers = resp.getWorkerMetadata().stream().map(wm -> DataFormatAdapter.convertMantisWorkerMetadataToMantisWorkerMetadataWritable(wm)).collect(Collectors.toList());
                return completeOK(workers, Jackson.marshaller());
            })));
        } else {
            return complete(StatusCodes.BAD_REQUEST, "error: 'archived/<jobId>' request must include a valid jobId");
        }
    })), path(segment("archived"), () -> {
        jobArchivedWorkersGETInvalid.increment();
        return complete(StatusCodes.BAD_REQUEST, "error: 'archived' Request must include jobId");
    }))))));
}
Also used : JobId(io.mantisrx.server.master.domain.JobId) Uri(akka.http.javadsl.model.Uri) MasterConfiguration(io.mantisrx.server.master.config.MasterConfiguration) Arrays(java.util.Arrays) JavaPartialFunction(akka.japi.JavaPartialFunction) Cache(akka.http.caching.javadsl.Cache) JobRouteHandler(io.mantisrx.master.api.akka.route.handlers.JobRouteHandler) LoggerFactory(org.slf4j.LoggerFactory) RequestContext(akka.http.javadsl.server.RequestContext) PathMatchers.segment(akka.http.javadsl.server.PathMatchers.segment) StringUnmarshallers(akka.http.javadsl.unmarshalling.StringUnmarshallers) PathMatcher0(akka.http.javadsl.server.PathMatcher0) ListArchivedWorkersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersRequest) JobClusterProtoAdapter(io.mantisrx.master.api.akka.route.proto.JobClusterProtoAdapter) Unmarshaller(akka.http.javadsl.unmarshalling.Unmarshaller) Metrics(io.mantisrx.common.metrics.Metrics) ResubmitWorkerRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerRequest) PostJobStatusRequest(io.mantisrx.server.core.PostJobStatusRequest) Jackson(io.mantisrx.master.api.akka.route.Jackson) BaseResponse(io.mantisrx.master.jobcluster.proto.BaseResponse) ScaleStageRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest) HttpMethods(akka.http.javadsl.model.HttpMethods) Collectors(java.util.stream.Collectors) ExceptionHandler(akka.http.javadsl.server.ExceptionHandler) List(java.util.List) JobRouteUtils.createListJobIdsRequest(io.mantisrx.master.api.akka.route.utils.JobRouteUtils.createListJobIdsRequest) CachingDirectives.alwaysCache(akka.http.javadsl.server.directives.CachingDirectives.alwaysCache) ActorSystem(akka.actor.ActorSystem) ConfigurationProvider(io.mantisrx.server.master.config.ConfigurationProvider) Optional(java.util.Optional) DataFormatAdapter(io.mantisrx.server.master.domain.DataFormatAdapter) MantisJobMetadataView(io.mantisrx.master.jobcluster.job.MantisJobMetadataView) MantisWorkerMetadataWritable(io.mantisrx.server.master.store.MantisWorkerMetadataWritable) JobRouteUtils.createListJobsRequest(io.mantisrx.master.api.akka.route.utils.JobRouteUtils.createListJobsRequest) Route(akka.http.javadsl.server.Route) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) StatusCodes(akka.http.javadsl.model.StatusCodes) Function(java.util.function.Function) PathMatchers(akka.http.javadsl.server.PathMatchers) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) KillJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobRequest) Counter(io.mantisrx.common.metrics.Counter) Logger(org.slf4j.Logger) RouteResult(akka.http.javadsl.server.RouteResult) HttpHeader(akka.http.javadsl.model.HttpHeader) HttpRequest(akka.http.javadsl.model.HttpRequest) IOException(java.io.IOException) DEFAULT_LIST_ARCHIVED_WORKERS_LIMIT(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersRequest.DEFAULT_LIST_ARCHIVED_WORKERS_LIMIT) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) JobRouteUtils.createWorkerStatusRequest(io.mantisrx.master.api.akka.route.utils.JobRouteUtils.createWorkerStatusRequest) Collections(java.util.Collections) WorkerHeartbeat(io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat) Optional(java.util.Optional) PostJobStatusRequest(io.mantisrx.server.core.PostJobStatusRequest) IOException(java.io.IOException) ScaleStageRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest) WorkerHeartbeat(io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat) MantisJobMetadataView(io.mantisrx.master.jobcluster.job.MantisJobMetadataView) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) ResubmitWorkerRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerRequest) KillJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobRequest) MantisWorkerMetadataWritable(io.mantisrx.server.master.store.MantisWorkerMetadataWritable) ListArchivedWorkersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersRequest) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)

Example 2 with ScaleStageRequest

use of io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest in project mantis by Netflix.

the class JobClusterTest method testScaleStage.

@Test
public void testScaleStage() {
    TestKit probe = new TestKit(system);
    try {
        String clusterName = "testScaleStage";
        MantisScheduler schedulerMock = mock(MantisScheduler.class);
        MantisJobStore jobStoreMock = mock(MantisJobStore.class);
        final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
        ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
        jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
        JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
        assertEquals(SUCCESS, createResp.responseCode);
        Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
        smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
        smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
        SchedulingInfo SINGLE_WORKER_SCHED_INFO = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, DEFAULT_MACHINE_DEFINITION, Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 1, 10, 1, 1, 1, smap)).build();
        final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient, "USER_TYPE", SINGLE_WORKER_SCHED_INFO, Lists.newArrayList());
        String jobId = clusterName + "-1";
        jobClusterActor.tell(new SubmitJobRequest(clusterName, "user", Optional.ofNullable(jobDefn)), probe.getRef());
        SubmitJobResponse submitResponse = probe.expectMsgClass(SubmitJobResponse.class);
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, 0, new WorkerId(clusterName, jobId, 0, 1));
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, 1, new WorkerId(clusterName, jobId, 0, 2));
        JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, BaseResponse.ResponseCode.SUCCESS, JobState.Launched);
        jobClusterActor.tell(new ScaleStageRequest(jobId, 1, 2, user, "No reason"), probe.getRef());
        ScaleStageResponse scaleResp = probe.expectMsgClass(ScaleStageResponse.class);
        System.out.println("scale Resp: " + scaleResp.message);
        assertEquals(SUCCESS, scaleResp.responseCode);
        assertEquals(2, scaleResp.getActualNumWorkers());
        verify(jobStoreMock, times(1)).storeNewJob(any());
        // initial worker
        verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
        // scale up worker
        verify(jobStoreMock, times(1)).storeNewWorker(any());
        verify(jobStoreMock, times(6)).updateWorker(any());
        verify(jobStoreMock, times(3)).updateJob(any());
        // initial worker and scale up worker
        verify(schedulerMock, times(3)).scheduleWorker(any());
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) HashMap(java.util.HashMap) ActorRef(akka.actor.ActorRef) ScaleStageResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageResponse) SubmitJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) Matchers.anyString(org.mockito.Matchers.anyString) WorkerId(io.mantisrx.server.core.domain.WorkerId) ScaleStageRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) StageDeploymentStrategy(io.mantisrx.runtime.descriptor.StageDeploymentStrategy) DeploymentStrategy(io.mantisrx.runtime.descriptor.DeploymentStrategy) SubmitJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobResponse) Test(org.junit.Test)

Aggregations

ScaleStageRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest)2 ActorRef (akka.actor.ActorRef)1 ActorSystem (akka.actor.ActorSystem)1 Cache (akka.http.caching.javadsl.Cache)1 HttpHeader (akka.http.javadsl.model.HttpHeader)1 HttpMethods (akka.http.javadsl.model.HttpMethods)1 HttpRequest (akka.http.javadsl.model.HttpRequest)1 StatusCodes (akka.http.javadsl.model.StatusCodes)1 Uri (akka.http.javadsl.model.Uri)1 ExceptionHandler (akka.http.javadsl.server.ExceptionHandler)1 PathMatcher0 (akka.http.javadsl.server.PathMatcher0)1 PathMatchers (akka.http.javadsl.server.PathMatchers)1 PathMatchers.segment (akka.http.javadsl.server.PathMatchers.segment)1 RequestContext (akka.http.javadsl.server.RequestContext)1 Route (akka.http.javadsl.server.Route)1 RouteResult (akka.http.javadsl.server.RouteResult)1 CachingDirectives.alwaysCache (akka.http.javadsl.server.directives.CachingDirectives.alwaysCache)1 StringUnmarshallers (akka.http.javadsl.unmarshalling.StringUnmarshallers)1 Unmarshaller (akka.http.javadsl.unmarshalling.Unmarshaller)1 JavaPartialFunction (akka.japi.JavaPartialFunction)1