Search in sources :

Example 1 with StageScalingPolicy

use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleDown.

@Test
public void testJobScaleDown() throws Exception {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleDown";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(3, workerRegistryV2.getNumRunningWorkers());
    // send scale down request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 1, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleDownResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(1, scaleResp.getActualNumWorkers());
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(1, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 2) {
            break;
        }
    }
    assertTrue(cnt < 50);
// assertEquals(2, WorkerRegistryV2.INSTANCE.getNumRunningWorkers());
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 2 with StageScalingPolicy

use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleUp.

@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleUp";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(2, workerRegistryV2.getNumRunningWorkers());
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(2, scaleResp.getActualNumWorkers());
    JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 3) {
            break;
        }
    }
    assertTrue(cnt < 50);
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) WorkerId(io.mantisrx.server.core.domain.WorkerId) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 3 with StageScalingPolicy

use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.

the class JobActor method isAutoscaled.

private boolean isAutoscaled(SchedulingInfo schedulingInfo) {
    LOGGER.trace("In isAutoscaled {}", schedulingInfo);
    for (Map.Entry<Integer, StageSchedulingInfo> entry : schedulingInfo.getStages().entrySet()) {
        final StageScalingPolicy scalingPolicy = entry.getValue().getScalingPolicy();
        if (scalingPolicy != null && scalingPolicy.isEnabled()) {
            LOGGER.info("Job {} is autoscaleable", jobId);
            return true;
        }
    }
    LOGGER.info("Job {} is NOT scaleable", jobId);
    return false;
}
Also used : StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with StageScalingPolicy

use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.

the class JobsRoute method validateSubmitJobRequest.

/**
 * @return true to indicate valid, false otherwise. The String holds the error message when the request is invalid
 */
private Pair<Boolean, String> validateSubmitJobRequest(MantisJobDefinition mjd, Optional<String> clusterNameInResource) {
    if (null == mjd) {
        logger.error("rejecting job submit request, job definition is malformed {}", mjd);
        return Pair.apply(false, "Malformed job definition.");
    }
    // must include job cluster name
    if (mjd.getName() == null || mjd.getName().length() == 0) {
        logger.info("rejecting job submit request, must include name {}", mjd);
        return Pair.apply(false, "Job definition must include name");
    }
    // validate specified job cluster name matches with what specified in REST resource endpoint
    if (clusterNameInResource.isPresent()) {
        if (!clusterNameInResource.get().equals(mjd.getName())) {
            String msg = String.format("Cluster name specified in request payload [%s] " + "does not match with what specified in resource endpoint [%s]", mjd.getName(), clusterNameInResource.get());
            logger.info("rejecting job submit request, {} {}", msg, mjd);
            return Pair.apply(false, msg);
        }
    }
    // validate scheduling info
    SchedulingInfo schedulingInfo = mjd.getSchedulingInfo();
    if (schedulingInfo != null) {
        Map<Integer, StageSchedulingInfo> stages = schedulingInfo.getStages();
        if (stages != null) {
            for (StageSchedulingInfo stageSchedInfo : stages.values()) {
                double cpuCores = stageSchedInfo.getMachineDefinition().getCpuCores();
                int maxCpuCores = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxCpuCores();
                if (cpuCores > maxCpuCores) {
                    logger.info("rejecting job submit request, requested CPU {} > max for {} (user: {}) (stage: {})", cpuCores, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested CPU cannot be more than max CPU per worker " + maxCpuCores);
                }
                double memoryMB = stageSchedInfo.getMachineDefinition().getMemoryMB();
                int maxMemoryMB = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxMemoryMB();
                if (memoryMB > maxMemoryMB) {
                    logger.info("rejecting job submit request, requested memory {} > max for {} (user: {}) (stage: {})", memoryMB, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested memory cannot be more than max memoryMB per worker " + maxMemoryMB);
                }
                double networkMbps = stageSchedInfo.getMachineDefinition().getNetworkMbps();
                int maxNetworkMbps = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxNetworkMbps();
                if (networkMbps > maxNetworkMbps) {
                    logger.info("rejecting job submit request, requested network {} > max for {} (user: {}) (stage: {})", networkMbps, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested network cannot be more than max networkMbps per worker " + maxNetworkMbps);
                }
                int numberOfInstances = stageSchedInfo.getNumberOfInstances();
                int maxWorkersPerStage = ConfigurationProvider.getConfig().getMaxWorkersPerStage();
                if (numberOfInstances > maxWorkersPerStage) {
                    logger.info("rejecting job submit request, requested num instances {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested number of instances per stage cannot be more than " + maxWorkersPerStage);
                }
                StageScalingPolicy scalingPolicy = stageSchedInfo.getScalingPolicy();
                if (scalingPolicy != null) {
                    if (scalingPolicy.getMax() > maxWorkersPerStage) {
                        logger.info("rejecting job submit request, requested num instances in scaling policy {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
                        return Pair.apply(false, "requested number of instances per stage in scaling policy cannot be more than " + maxWorkersPerStage);
                    }
                }
            }
        }
    }
    return Pair.apply(true, "");
}
Also used : StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo)

Example 5 with StageScalingPolicy

use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.

the class JobScaleUpDownTests method testJobScaleUpFailsIfMinEqualsMax.

@Test
public void testJobScaleUpFailsIfMinEqualsMax() throws Exception {
    final TestKit probe = new TestKit(system);
    Map<ScalingReason, Strategy> smap = new HashMap<>();
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 1, 1, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleUpFailsIfNoScaleStrategy";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, lifecycleEventPublisher);
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 3, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(CLIENT_ERROR, scaleResp.responseCode);
    assertEquals(0, scaleResp.getActualNumWorkers());
    verify(jobStoreMock, times(1)).storeNewJob(any());
    // initial worker
    verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
    // no scale up worker happened
    verify(jobStoreMock, times(0)).storeNewWorker(any());
    verify(jobStoreMock, times(3)).updateWorker(any());
    verify(jobStoreMock, times(3)).updateJob(any());
    // initial worker only
    verify(schedulerMock, times(1)).scheduleWorker(any());
}
Also used : SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) HashMap(java.util.HashMap) ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Strategy(io.mantisrx.runtime.descriptor.StageScalingPolicy.Strategy) ScalingReason(io.mantisrx.runtime.descriptor.StageScalingPolicy.ScalingReason) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) Test(org.junit.Test)

Aggregations

StageScalingPolicy (io.mantisrx.runtime.descriptor.StageScalingPolicy)28 Test (org.junit.Test)24 HashMap (java.util.HashMap)19 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)18 MachineDefinition (io.mantisrx.runtime.MachineDefinition)15 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)14 ActorRef (akka.actor.ActorRef)10 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)10 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)10 ScalingReason (io.mantisrx.runtime.descriptor.StageScalingPolicy.ScalingReason)9 Strategy (io.mantisrx.runtime.descriptor.StageScalingPolicy.Strategy)9 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)8 TestKit (akka.testkit.javadsl.TestKit)7 Matchers.anyString (org.mockito.Matchers.anyString)6 Context (io.mantisrx.runtime.Context)5 JobSchedulingInfo (io.mantisrx.server.core.JobSchedulingInfo)5 MantisMasterClientApi (io.mantisrx.server.master.client.MantisMasterClientApi)5 JobId (io.mantisrx.server.master.domain.JobId)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 ClutchConfiguration (com.netflix.control.clutch.ClutchConfiguration)4