use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.
the class WorkerRegistryV2Test method testJobScaleDown.
@Test
public void testJobScaleDown() throws Exception {
WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
String clusterName = "testJobScaleDown";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
assertEquals(3, workerRegistryV2.getNumRunningWorkers());
// send scale down request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 1, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleDownResp " + scaleResp.message);
assertEquals(SUCCESS, scaleResp.responseCode);
assertEquals(1, scaleResp.getActualNumWorkers());
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
assertEquals(1, stageMetadata.get(1).getAllWorkers().size());
int cnt = 0;
for (int i = 0; i < 50; i++) {
cnt++;
if (workerRegistryV2.getNumRunningWorkers() == 2) {
break;
}
}
assertTrue(cnt < 50);
// assertEquals(2, WorkerRegistryV2.INSTANCE.getNumRunningWorkers());
}
use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.
the class WorkerRegistryV2Test method testJobScaleUp.
@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
String clusterName = "testJobScaleUp";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
assertEquals(2, workerRegistryV2.getNumRunningWorkers());
// send scale up request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleupResp " + scaleResp.message);
assertEquals(SUCCESS, scaleResp.responseCode);
assertEquals(2, scaleResp.getActualNumWorkers());
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
int cnt = 0;
for (int i = 0; i < 50; i++) {
cnt++;
if (workerRegistryV2.getNumRunningWorkers() == 3) {
break;
}
}
assertTrue(cnt < 50);
}
use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.
the class TestHelpers method createFakeScheduleRequest.
public static ScheduleRequest createFakeScheduleRequest(final WorkerId workerId, final int stageNum, final int numStages, final MachineDefinition machineDefinition) {
try {
JobDefinition jobDefinition = new JobDefinition.Builder().withArtifactName("jar").withSchedulingInfo(new SchedulingInfo(Collections.singletonMap(0, StageSchedulingInfo.builder().numberOfInstances(1).machineDefinition(machineDefinition).hardConstraints(Collections.emptyList()).softConstraints(Collections.emptyList()).build()))).withJobSla(new JobSla(0, 0, null, MantisJobDurationType.Perpetual, null)).build();
IMantisJobMetadata mantisJobMetadata = new MantisJobMetadataImpl.Builder().withJobId(JobId.fromId(workerId.getJobId()).get()).withJobDefinition(jobDefinition).build();
return new ScheduleRequest(workerId, stageNum, numStages, new JobMetadata(mantisJobMetadata.getJobId().getId(), mantisJobMetadata.getJobJarUrl(), mantisJobMetadata.getTotalStages(), mantisJobMetadata.getUser(), mantisJobMetadata.getSchedulingInfo(), mantisJobMetadata.getParameters(), mantisJobMetadata.getSubscriptionTimeoutSecs(), mantisJobMetadata.getMinRuntimeSecs()), mantisJobMetadata.getSla().get().getDurationType(), machineDefinition, Collections.emptyList(), Collections.emptyList(), 0, Optional.empty());
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.
the class JobsRoute method validateSubmitJobRequest.
/**
* @return true to indicate valid, false otherwise. The String holds the error message when the request is invalid
*/
private Pair<Boolean, String> validateSubmitJobRequest(MantisJobDefinition mjd, Optional<String> clusterNameInResource) {
if (null == mjd) {
logger.error("rejecting job submit request, job definition is malformed {}", mjd);
return Pair.apply(false, "Malformed job definition.");
}
// must include job cluster name
if (mjd.getName() == null || mjd.getName().length() == 0) {
logger.info("rejecting job submit request, must include name {}", mjd);
return Pair.apply(false, "Job definition must include name");
}
// validate specified job cluster name matches with what specified in REST resource endpoint
if (clusterNameInResource.isPresent()) {
if (!clusterNameInResource.get().equals(mjd.getName())) {
String msg = String.format("Cluster name specified in request payload [%s] " + "does not match with what specified in resource endpoint [%s]", mjd.getName(), clusterNameInResource.get());
logger.info("rejecting job submit request, {} {}", msg, mjd);
return Pair.apply(false, msg);
}
}
// validate scheduling info
SchedulingInfo schedulingInfo = mjd.getSchedulingInfo();
if (schedulingInfo != null) {
Map<Integer, StageSchedulingInfo> stages = schedulingInfo.getStages();
if (stages != null) {
for (StageSchedulingInfo stageSchedInfo : stages.values()) {
double cpuCores = stageSchedInfo.getMachineDefinition().getCpuCores();
int maxCpuCores = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxCpuCores();
if (cpuCores > maxCpuCores) {
logger.info("rejecting job submit request, requested CPU {} > max for {} (user: {}) (stage: {})", cpuCores, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested CPU cannot be more than max CPU per worker " + maxCpuCores);
}
double memoryMB = stageSchedInfo.getMachineDefinition().getMemoryMB();
int maxMemoryMB = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxMemoryMB();
if (memoryMB > maxMemoryMB) {
logger.info("rejecting job submit request, requested memory {} > max for {} (user: {}) (stage: {})", memoryMB, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested memory cannot be more than max memoryMB per worker " + maxMemoryMB);
}
double networkMbps = stageSchedInfo.getMachineDefinition().getNetworkMbps();
int maxNetworkMbps = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxNetworkMbps();
if (networkMbps > maxNetworkMbps) {
logger.info("rejecting job submit request, requested network {} > max for {} (user: {}) (stage: {})", networkMbps, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested network cannot be more than max networkMbps per worker " + maxNetworkMbps);
}
int numberOfInstances = stageSchedInfo.getNumberOfInstances();
int maxWorkersPerStage = ConfigurationProvider.getConfig().getMaxWorkersPerStage();
if (numberOfInstances > maxWorkersPerStage) {
logger.info("rejecting job submit request, requested num instances {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested number of instances per stage cannot be more than " + maxWorkersPerStage);
}
StageScalingPolicy scalingPolicy = stageSchedInfo.getScalingPolicy();
if (scalingPolicy != null) {
if (scalingPolicy.getMax() > maxWorkersPerStage) {
logger.info("rejecting job submit request, requested num instances in scaling policy {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
return Pair.apply(false, "requested number of instances per stage in scaling policy cannot be more than " + maxWorkersPerStage);
}
}
}
}
}
return Pair.apply(true, "");
}
use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.
the class NoOpMantisJobOperations method convertMantisJobWriteableToMantisJobMetadata.
// TODO job specific migration config is not supported, migration config will be at cluster level
public static IMantisJobMetadata convertMantisJobWriteableToMantisJobMetadata(MantisJobMetadata archJob, LifecycleEventPublisher eventPublisher, boolean isArchived) throws Exception {
if (logger.isTraceEnabled()) {
logger.trace("DataFormatAdapter:Converting {}", archJob);
}
// convert stages to new format
List<IMantisStageMetadata> convertedStageList = new ArrayList<>();
for (MantisStageMetadata stageMeta : ((MantisJobMetadataWritable) archJob).getStageMetadata()) {
// if this is an archived job then add workerIndex may fail as there maybe multiple workers related to a given index so skip adding workers to stage
boolean skipAddingWorkers = false;
if (isArchived) {
skipAddingWorkers = true;
}
convertedStageList.add(convertMantisStageMetadataWriteableToMantisStageMetadata(stageMeta, eventPublisher, skipAddingWorkers));
}
// generate SchedulingInfo
SchedulingInfo schedulingInfo = generateSchedulingInfo(convertedStageList);
URL jarUrl = archJob.getJarUrl();
Optional<String> artifactName = extractArtifactName(jarUrl);
// generate job defn
JobDefinition jobDefn = new JobDefinition(archJob.getName(), archJob.getUser(), artifactName.orElse(""), null, archJob.getParameters(), archJob.getSla(), archJob.getSubscriptionTimeoutSecs(), schedulingInfo, archJob.getNumStages(), archJob.getLabels(), null);
Optional<JobId> jIdOp = JobId.fromId(archJob.getJobId());
if (!jIdOp.isPresent()) {
throw new IllegalArgumentException("Invalid JobId " + archJob.getJobId());
}
// generate job meta
MantisJobMetadataImpl mantisJobMetadata = new MantisJobMetadataImpl(jIdOp.get(), archJob.getSubmittedAt(), archJob.getStartedAt(), jobDefn, convertMantisJobStateToJobState(archJob.getState()), archJob.getNextWorkerNumberToUse());
// add the stages
for (IMantisStageMetadata stageMetadata : convertedStageList) {
mantisJobMetadata.addJobStageIfAbsent(stageMetadata);
}
if (logger.isTraceEnabled()) {
logger.trace("DataFormatAdapter:Completed conversion to IMantisJobMetadata {}", mantisJobMetadata);
}
return mantisJobMetadata;
}
Aggregations