Search in sources :

Example 6 with MantisScheduler

use of io.mantisrx.server.master.scheduler.MantisScheduler in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleUp.

@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleUp";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(2, workerRegistryV2.getNumRunningWorkers());
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(2, scaleResp.getActualNumWorkers());
    JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 3) {
            break;
        }
    }
    assertTrue(cnt < 50);
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) WorkerId(io.mantisrx.server.core.domain.WorkerId) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 7 with MantisScheduler

use of io.mantisrx.server.master.scheduler.MantisScheduler in project mantis by Netflix.

the class JobClustersManagerActor method initialize.

private void initialize(JobClustersManagerInitialize initMsg) {
    ActorRef sender = getSender();
    try {
        logger.info("In JobClustersManagerActor:initialize");
        this.jobListHelperActor = getContext().actorOf(JobListHelperActor.props(), "JobListHelperActor");
        getContext().watch(jobListHelperActor);
        mantisScheduler = initMsg.getScheduler();
        Map<String, IJobClusterMetadata> jobClusterMap = new HashMap<>();
        this.jobClusterInfoManager = new JobClusterInfoManager(jobStore, mantisScheduler, eventPublisher);
        if (!initMsg.isLoadJobsFromStore()) {
            getContext().become(initializedBehavior);
            sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
        } else {
            List<IJobClusterMetadata> jobClusters = jobStore.loadAllJobClusters();
            logger.info("Read {} job clusters from storage", jobClusters.size());
            List<IMantisJobMetadata> activeJobs = jobStore.loadAllActiveJobs();
            logger.info("Read {} jobs from storage", activeJobs.size());
            List<CompletedJob> completedJobs = jobStore.loadAllCompletedJobs();
            logger.info("Read {} completed jobs from storage", completedJobs.size());
            for (IJobClusterMetadata jobClusterMeta : jobClusters) {
                String clusterName = jobClusterMeta.getJobClusterDefinition().getName();
                jobClusterMap.put(clusterName, jobClusterMeta);
            }
            Map<String, List<IMantisJobMetadata>> clusterToJobMap = new HashMap<>();
            Map<String, List<CompletedJob>> clusterToCompletedJobMap = new HashMap<>();
            // group jobs by cluster
            for (IMantisJobMetadata jobMeta : activeJobs) {
                String clusterName = jobMeta.getClusterName();
                clusterToJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
            }
            for (CompletedJob jobMeta : completedJobs) {
                String clusterName = jobMeta.getName();
                clusterToCompletedJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
            }
            long masterInitTimeoutSecs = ConfigurationProvider.getConfig().getMasterInitTimeoutSecs();
            long timeout = ((masterInitTimeoutSecs - 60)) > 0 ? (masterInitTimeoutSecs - 60) : masterInitTimeoutSecs;
            Observable.from(jobClusterMap.values()).filter((jobClusterMeta) -> jobClusterMeta != null && jobClusterMeta.getJobClusterDefinition() != null).flatMap((jobClusterMeta) -> {
                Duration t = Duration.ofSeconds(timeout);
                Optional<JobClusterInfo> jobClusterInfoO = jobClusterInfoManager.createClusterActorAndRegister(jobClusterMeta.getJobClusterDefinition());
                if (!jobClusterInfoO.isPresent()) {
                    logger.info("skipping job cluster {} on bootstrap as actor creating failed", jobClusterMeta.getJobClusterDefinition().getName());
                    return Observable.empty();
                }
                JobClusterInfo jobClusterInfo = jobClusterInfoO.get();
                List<IMantisJobMetadata> jobList = Lists.newArrayList();
                List<IMantisJobMetadata> jList = clusterToJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
                if (jList != null) {
                    jobList.addAll(jList);
                }
                List<CompletedJob> completedJobsList = Lists.newArrayList();
                List<CompletedJob> cList = clusterToCompletedJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
                if (cList != null) {
                    completedJobsList.addAll(cList);
                }
                JobClusterProto.InitializeJobClusterRequest req = new JobClusterProto.InitializeJobClusterRequest((JobClusterDefinitionImpl) jobClusterMeta.getJobClusterDefinition(), jobClusterMeta.isDisabled(), jobClusterMeta.getLastJobCount(), jobList, completedJobsList, "system", getSelf(), false);
                return jobClusterInfoManager.initializeCluster(jobClusterInfo, req, t);
            }).filter(Objects::nonNull).toBlocking().subscribe((clusterInit) -> {
                logger.info("JobCluster {} inited with code {}", clusterInit.jobClusterName, clusterInit.responseCode);
                numJobClusterInitSuccesses.increment();
            }, (error) -> {
                logger.warn("Exception initializing clusters {}", error.getMessage(), error);
                logger.error("JobClusterManagerActor had errors during initialization NOT transitioning to initialized behavior");
                // getContext().become(initializedBehavior);
                sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, "JobClustersManager  inited with errors"), getSelf());
            }, () -> {
                logger.info("JobClusterManagerActor transitioning to initialized behavior");
                getContext().become(initializedBehavior);
                sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
            });
            getTimers().startPeriodicTimer(CHECK_CLUSTERS_TIMER_KEY, new ReconcileJobCluster(), Duration.ofSeconds(checkAgainInSecs));
            // kick off loading of archived jobs
            logger.info("Kicking off archived job load asynchronously");
            jobStore.loadAllArchivedJobsAsync();
        }
    } catch (Exception e) {
        logger.error("caught exception", e);
        sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, e.getMessage()), getSelf());
    }
    logger.info("JobClustersManagerActor:initialize ends");
}
Also used : JobId(io.mantisrx.server.master.domain.JobId) Terminated(akka.actor.Terminated) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) GetLastSubmittedJobIdStreamResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamResponse) ListArchivedWorkersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersRequest) CreateJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.CreateJobClusterRequest) UpdateJobClusterLabelsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsResponse) ActorRef(akka.actor.ActorRef) UpdateJobClusterArtifactResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactResponse) DeleteJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterRequest) Duration(java.time.Duration) Map(java.util.Map) Schedulers(rx.schedulers.Schedulers) Metrics(io.mantisrx.common.metrics.Metrics) DisableJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterResponse) ListCompletedJobsInClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListCompletedJobsInClusterResponse) Set(java.util.Set) ListCompletedJobsInClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListCompletedJobsInClusterRequest) ScaleStageRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest) BaseResponse(io.mantisrx.master.jobcluster.proto.BaseResponse) SUCCESS_CREATED(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS_CREATED) SubmitJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest) CompletionStage(java.util.concurrent.CompletionStage) UpdateJobClusterWorkerMigrationStrategyRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterWorkerMigrationStrategyRequest) ListJobIdsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobIdsRequest) JobHelper(io.mantisrx.master.jobcluster.job.JobHelper) CLIENT_ERROR_CONFLICT(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR_CONFLICT) ListWorkersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListWorkersRequest) IJobClusterMetadata(io.mantisrx.master.jobcluster.IJobClusterMetadata) Optional.empty(java.util.Optional.empty) SERVER_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR) ListJobClustersRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobClustersRequest) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) ArrayList(java.util.ArrayList) ListJobIdsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobIdsResponse) SupervisorStrategy(akka.actor.SupervisorStrategy) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) GetLastSubmittedJobIdStreamRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamRequest) KillJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobRequest) EnableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterRequest) CreateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.CreateJobClusterResponse) GetJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest) UpdateJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterRequest) CLIENT_ERROR_NOT_FOUND(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR_NOT_FOUND) UpdateJobClusterWorkerMigrationStrategyResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterWorkerMigrationStrategyResponse) CompletedJob(io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) KillJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse) ListJobsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsResponse) ListJobClustersResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobClustersResponse) EnableJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.EnableJobClusterResponse) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) GetJobSchedInfoRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobSchedInfoRequest) JobState(io.mantisrx.master.jobcluster.job.JobState) LoggerFactory(org.slf4j.LoggerFactory) GaugeCallback(io.mantisrx.common.metrics.spectator.GaugeCallback) JobCompletedReason(io.mantisrx.server.core.JobCompletedReason) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) ListJobsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsRequest) CLIENT_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR) GetJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) GetLatestJobDiscoveryInfoResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLatestJobDiscoveryInfoResponse) JobClustersManagerInitialize(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitialize) ReconcileJobCluster(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ReconcileJobCluster) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) JobClusterActor(io.mantisrx.master.jobcluster.JobClusterActor) ResubmitWorkerResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerResponse) DisableJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DisableJobClusterRequest) ResubmitWorkerRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerRequest) UpdateJobClusterLabelsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsRequest) Collectors(java.util.stream.Collectors) GetJobSchedInfoResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobSchedInfoResponse) Objects(java.util.Objects) List(java.util.List) ActorPaths(akka.actor.ActorPaths) PatternsCS.ask(akka.pattern.PatternsCS.ask) SubmitJobResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobResponse) GetJobDetailsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest) ConfigurationProvider(io.mantisrx.server.master.config.ConfigurationProvider) Optional(java.util.Optional) Props(akka.actor.Props) UpdateJobClusterSLARequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLARequest) ScaleStageResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageResponse) UpdateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterResponse) ListArchivedWorkersResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListArchivedWorkersResponse) HashMap(java.util.HashMap) UpdateJobClusterSLAResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterSLAResponse) MantisActorSupervisorStrategy(io.mantisrx.master.akka.MantisActorSupervisorStrategy) Observable(rx.Observable) DeleteJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.DeleteJobClusterResponse) ListWorkersResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListWorkersResponse) AbstractActorWithTimers(akka.actor.AbstractActorWithTimers) Counter(io.mantisrx.common.metrics.Counter) Logger(org.slf4j.Logger) Optional.ofNullable(java.util.Optional.ofNullable) GetLatestJobDiscoveryInfoRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLatestJobDiscoveryInfoRequest) SUCCESS(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS) Lists(io.mantisrx.shaded.com.google.common.collect.Lists) UpdateJobClusterArtifactRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactRequest) Collections(java.util.Collections) JobClustersManagerInitializeResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse) LifecycleEventPublisher(io.mantisrx.master.events.LifecycleEventPublisher) MetricGroupId(io.mantisrx.common.metrics.spectator.MetricGroupId) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) HashMap(java.util.HashMap) ActorRef(akka.actor.ActorRef) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ReconcileJobCluster(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ReconcileJobCluster) JobClustersManagerInitializeResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse) Optional(java.util.Optional) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) Duration(java.time.Duration) CompletedJob(io.mantisrx.server.master.domain.JobClusterDefinitionImpl.CompletedJob) Objects(java.util.Objects) IJobClusterMetadata(io.mantisrx.master.jobcluster.IJobClusterMetadata)

Example 8 with MantisScheduler

use of io.mantisrx.server.master.scheduler.MantisScheduler in project mantis by Netflix.

the class JobClusterTest method testJobKillTriggersSLAToLaunchNew.

@Test
public void testJobKillTriggersSLAToLaunchNew() {
    TestKit probe = new TestKit(system);
    String clusterName = "testJobKillTriggersSLAToLaunchNew";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    SLA sla = new SLA(1, 1, null, null);
    final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, Lists.newArrayList(), sla);
    ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
    jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
    JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
    assertEquals(SUCCESS, createResp.responseCode);
    String jobId = clusterName + "-1";
    WorkerId workerId1 = new WorkerId(clusterName, jobId, 0, 1);
    doAnswer(invocation -> {
        WorkerEvent terminate = new WorkerTerminate(workerId1, WorkerState.Completed, JobCompletedReason.Killed, System.currentTimeMillis());
        jobClusterActor.tell(terminate, probe.getRef());
        return null;
    }).when(schedulerMock).unscheduleWorker(any(), any());
    try {
        final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient);
        JobId jId = new JobId(clusterName, 1);
        JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
        JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Accepted);
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, 1, new WorkerId(clusterName, jobId, 0, 1));
        JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Launched);
        JobTestHelper.killJobAndVerify(probe, clusterName, jId, jobClusterActor);
        Thread.sleep(500);
        // a new job should have been submitted
        JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, clusterName + "-2", SUCCESS, JobState.Accepted);
    // JobTestHelper.killJobAndVerify(probe, clusterName, new JobId(clusterName, 2), jobClusterActor);
    // verify(jobStoreMock, times(1)).createJobCluster(any());
    // verify(jobStoreMock, times(1)).updateJobCluster(any());
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        fail();
    }
// Mockito.doThrow(IOException.class).when(jobStoreMock).storeNewJob(any());
}
Also used : JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) Matchers.anyString(org.mockito.Matchers.anyString) WorkerId(io.mantisrx.server.core.domain.WorkerId) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) WorkerTerminate(io.mantisrx.master.jobcluster.job.worker.WorkerTerminate) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) Test(org.junit.Test)

Example 9 with MantisScheduler

use of io.mantisrx.server.master.scheduler.MantisScheduler in project mantis by Netflix.

the class JobClusterTest method testZombieWorkerKilledOnMessage.

@Test
public void testZombieWorkerKilledOnMessage() {
    String clusterName = "testZombieWorkerKilledOnMessage";
    TestKit probe = new TestKit(system);
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
    ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
    jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
    JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
    assertEquals(SUCCESS, createResp.responseCode);
    try {
        String jobId = clusterName + "-1";
        WorkerId workerId = new WorkerId(clusterName, jobId, 0, 1);
        WorkerEvent heartBeat2 = new WorkerHeartbeat(new Status(jobId, 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.HEARTBEAT, "", MantisJobState.Started, System.currentTimeMillis()));
        jobClusterActor.tell(heartBeat2, probe.getRef());
        jobClusterActor.tell(new GetJobClusterRequest(clusterName), probe.getRef());
        GetJobClusterResponse resp = probe.expectMsgClass(GetJobClusterResponse.class);
        assertEquals(clusterName, resp.getJobCluster().get().getName());
        verify(schedulerMock, times(1)).unscheduleAndTerminateWorker(workerId, empty());
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : Status(io.mantisrx.server.core.Status) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) GetJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse) ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) Matchers.anyString(org.mockito.Matchers.anyString) TestKit(akka.testkit.javadsl.TestKit) WorkerId(io.mantisrx.server.core.domain.WorkerId) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) WorkerHeartbeat(io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) GetJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest) Test(org.junit.Test)

Example 10 with MantisScheduler

use of io.mantisrx.server.master.scheduler.MantisScheduler in project mantis by Netflix.

the class JobClusterTest method testJobSubmitWithNoSchedInfoUsesJobClusterValues.

@Test
public void testJobSubmitWithNoSchedInfoUsesJobClusterValues() {
    TestKit probe = new TestKit(system);
    String clusterName = "testJobSubmitWithNoSchedInfoUsesJobClusterValues";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    List<Label> clusterLabels = new ArrayList<>();
    Label label = new Label("clabelName", "cLabelValue");
    clusterLabels.add(label);
    final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, clusterLabels);
    ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
    jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
    JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
    assertEquals(SUCCESS, createResp.responseCode);
    try {
        final JobDefinition jobDefn = new JobDefinition.Builder().withName(clusterName).withVersion("0.0.1").withSubscriptionTimeoutSecs(0).withUser("njoshi").build();
        String jobId = clusterName + "-1";
        JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
        jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId).get()), probe.getRef());
        GetJobDetailsResponse detailsResp = probe.expectMsgClass(GetJobDetailsResponse.class);
        assertEquals(SUCCESS, detailsResp.responseCode);
        assertEquals(JobState.Accepted, detailsResp.getJobMetadata().get().getState());
        // 
        assertEquals(clusterLabels.size() + LabelManager.numberOfMandatoryLabels(), detailsResp.getJobMetadata().get().getLabels().size());
        // confirm that the clusters labels got inherited
        assertEquals(1, detailsResp.getJobMetadata().get().getLabels().stream().filter(l -> l.getName().equals("clabelName")).count());
        // assertEquals(label, detailsResp.getJobMetadata().get().getLabels().get(0));
        // Now submit another one with labels, it should not inherit cluster labels
        Label jobLabel = new Label("jobLabel", "jobValue");
        List<Label> jobLabelList = new ArrayList<>();
        jobLabelList.add(jobLabel);
        final JobDefinition jobDefn2 = new JobDefinition.Builder().withName(clusterName).withVersion("0.0.1").withLabels(jobLabelList).withSubscriptionTimeoutSecs(0).withUser("njoshi").build();
        String jobId2 = clusterName + "-2";
        JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn2, jobId2);
        jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId2).get()), probe.getRef());
        GetJobDetailsResponse detailsResp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
        assertEquals(SUCCESS, detailsResp2.responseCode);
        assertEquals(JobState.Accepted, detailsResp2.getJobMetadata().get().getState());
        assertEquals(clusterLabels.size() + 2, detailsResp2.getJobMetadata().get().getLabels().size());
        // confirm that the clusters labels got inherited
        // assertEquals(jobLabel, detailsResp2.getJobMetadata().get().getLabels().get(0));
        assertEquals(1, detailsResp2.getJobMetadata().get().getLabels().stream().filter(l -> l.getName().equals(jobLabel.getName())).count());
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        fail();
    }
// Mockito.doThrow(IOException.class).when(jobStoreMock).storeNewJob(any());
}
Also used : JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) ActorRef(akka.actor.ActorRef) Label(io.mantisrx.common.Label) ArrayList(java.util.ArrayList) GetJobDetailsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) Matchers.anyString(org.mockito.Matchers.anyString) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Aggregations

ActorRef (akka.actor.ActorRef)82 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)82 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)77 Test (org.junit.Test)70 TestKit (akka.testkit.javadsl.TestKit)65 JobClusterProto (io.mantisrx.master.jobcluster.proto.JobClusterProto)49 Matchers.anyString (org.mockito.Matchers.anyString)45 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)41 WorkerId (io.mantisrx.server.core.domain.WorkerId)32 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)26 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)25 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)21 IOException (java.io.IOException)20 JobId (io.mantisrx.server.master.domain.JobId)18 GetJobClusterRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest)16 GetJobClusterResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse)16 GetJobDetailsRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest)16 MachineDefinition (io.mantisrx.runtime.MachineDefinition)16 CountDownLatch (java.util.concurrent.CountDownLatch)13 IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)10