use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.
the class WorkerRegistryV2Test method testJobScaleDown.
@Test
public void testJobScaleDown() throws Exception {
WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
String clusterName = "testJobScaleDown";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
assertEquals(3, workerRegistryV2.getNumRunningWorkers());
// send scale down request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 1, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleDownResp " + scaleResp.message);
assertEquals(SUCCESS, scaleResp.responseCode);
assertEquals(1, scaleResp.getActualNumWorkers());
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
assertEquals(1, stageMetadata.get(1).getAllWorkers().size());
int cnt = 0;
for (int i = 0; i < 50; i++) {
cnt++;
if (workerRegistryV2.getNumRunningWorkers() == 2) {
break;
}
}
assertTrue(cnt < 50);
// assertEquals(2, WorkerRegistryV2.INSTANCE.getNumRunningWorkers());
}
use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.
the class WorkerRegistryV2Test method testJobScaleUp.
@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
String clusterName = "testJobScaleUp";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
assertEquals(2, workerRegistryV2.getNumRunningWorkers());
// send scale up request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleupResp " + scaleResp.message);
assertEquals(SUCCESS, scaleResp.responseCode);
assertEquals(2, scaleResp.getActualNumWorkers());
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
int cnt = 0;
for (int i = 0; i < 50; i++) {
cnt++;
if (workerRegistryV2.getNumRunningWorkers() == 3) {
break;
}
}
assertTrue(cnt < 50);
}
use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.
the class JobActor method isAutoscaled.
private boolean isAutoscaled(SchedulingInfo schedulingInfo) {
LOGGER.trace("In isAutoscaled {}", schedulingInfo);
for (Map.Entry<Integer, StageSchedulingInfo> entry : schedulingInfo.getStages().entrySet()) {
final StageScalingPolicy scalingPolicy = entry.getValue().getScalingPolicy();
if (scalingPolicy != null && scalingPolicy.isEnabled()) {
LOGGER.info("Job {} is autoscaleable", jobId);
return true;
}
}
LOGGER.info("Job {} is NOT scaleable", jobId);
return false;
}
use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.
the class JobsRoute method validateSubmitJobRequest.
/**
* @return true to indicate valid, false otherwise. The String holds the error message when the request is invalid
*/
private Pair<Boolean, String> validateSubmitJobRequest(MantisJobDefinition mjd, Optional<String> clusterNameInResource) {
if (null == mjd) {
logger.error("rejecting job submit request, job definition is malformed {}", mjd);
return Pair.apply(false, "Malformed job definition.");
}
// must include job cluster name
if (mjd.getName() == null || mjd.getName().length() == 0) {
logger.info("rejecting job submit request, must include name {}", mjd);
return Pair.apply(false, "Job definition must include name");
}
// validate specified job cluster name matches with what specified in REST resource endpoint
if (clusterNameInResource.isPresent()) {
if (!clusterNameInResource.get().equals(mjd.getName())) {
String msg = String.format("Cluster name specified in request payload [%s] " + "does not match with what specified in resource endpoint [%s]", mjd.getName(), clusterNameInResource.get());
logger.info("rejecting job submit request, {} {}", msg, mjd);
return Pair.apply(false, msg);
}
}
// validate scheduling info
SchedulingInfo schedulingInfo = mjd.getSchedulingInfo();
if (schedulingInfo != null) {
Map<Integer, StageSchedulingInfo> stages = schedulingInfo.getStages();
if (stages != null) {
for (StageSchedulingInfo stageSchedInfo : stages.values()) {
double cpuCores = stageSchedInfo.getMachineDefinition().getCpuCores();
int maxCpuCores = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxCpuCores();
if (cpuCores > maxCpuCores) {
logger.info("rejecting job submit request, requested CPU {} > max for {} (user: {}) (stage: {})", cpuCores, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested CPU cannot be more than max CPU per worker " + maxCpuCores);
}
double memoryMB = stageSchedInfo.getMachineDefinition().getMemoryMB();
int maxMemoryMB = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxMemoryMB();
if (memoryMB > maxMemoryMB) {
logger.info("rejecting job submit request, requested memory {} > max for {} (user: {}) (stage: {})", memoryMB, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested memory cannot be more than max memoryMB per worker " + maxMemoryMB);
}
double networkMbps = stageSchedInfo.getMachineDefinition().getNetworkMbps();
int maxNetworkMbps = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxNetworkMbps();
if (networkMbps > maxNetworkMbps) {
logger.info("rejecting job submit request, requested network {} > max for {} (user: {}) (stage: {})", networkMbps, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested network cannot be more than max networkMbps per worker " + maxNetworkMbps);
}
int numberOfInstances = stageSchedInfo.getNumberOfInstances();
int maxWorkersPerStage = ConfigurationProvider.getConfig().getMaxWorkersPerStage();
if (numberOfInstances > maxWorkersPerStage) {
logger.info("rejecting job submit request, requested num instances {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested number of instances per stage cannot be more than " + maxWorkersPerStage);
}
StageScalingPolicy scalingPolicy = stageSchedInfo.getScalingPolicy();
if (scalingPolicy != null) {
if (scalingPolicy.getMax() > maxWorkersPerStage) {
logger.info("rejecting job submit request, requested num instances in scaling policy {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested number of instances per stage in scaling policy cannot be more than " + maxWorkersPerStage);
}
}
}
}
}
return Pair.apply(true, "");
}
use of io.mantisrx.runtime.descriptor.StageScalingPolicy in project mantis by Netflix.
the class JobScaleUpDownTests method testJobScaleUpFailsIfMinEqualsMax.
@Test
public void testJobScaleUpFailsIfMinEqualsMax() throws Exception {
final TestKit probe = new TestKit(system);
Map<ScalingReason, Strategy> smap = new HashMap<>();
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 1, 1, 1, 1, 0, smap)).build();
String clusterName = "testJobScaleUpFailsIfNoScaleStrategy";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, lifecycleEventPublisher);
// send scale up request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 3, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleupResp " + scaleResp.message);
assertEquals(CLIENT_ERROR, scaleResp.responseCode);
assertEquals(0, scaleResp.getActualNumWorkers());
verify(jobStoreMock, times(1)).storeNewJob(any());
// initial worker
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
// no scale up worker happened
verify(jobStoreMock, times(0)).storeNewWorker(any());
verify(jobStoreMock, times(3)).updateWorker(any());
verify(jobStoreMock, times(3)).updateJob(any());
// initial worker only
verify(schedulerMock, times(1)).scheduleWorker(any());
}
Aggregations