Search in sources :

Example 1 with SchedulingInfo

use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleDown.

@Test
public void testJobScaleDown() throws Exception {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleDown";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(3, workerRegistryV2.getNumRunningWorkers());
    // send scale down request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 1, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleDownResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(1, scaleResp.getActualNumWorkers());
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(1, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 2) {
            break;
        }
    }
    assertTrue(cnt < 50);
// assertEquals(2, WorkerRegistryV2.INSTANCE.getNumRunningWorkers());
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 2 with SchedulingInfo

use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleUp.

@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleUp";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(2, workerRegistryV2.getNumRunningWorkers());
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(2, scaleResp.getActualNumWorkers());
    JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 3) {
            break;
        }
    }
    assertTrue(cnt < 50);
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) WorkerId(io.mantisrx.server.core.domain.WorkerId) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 3 with SchedulingInfo

use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.

the class TestHelpers method createFakeScheduleRequest.

public static ScheduleRequest createFakeScheduleRequest(final WorkerId workerId, final int stageNum, final int numStages, final MachineDefinition machineDefinition) {
    try {
        JobDefinition jobDefinition = new JobDefinition.Builder().withArtifactName("jar").withSchedulingInfo(new SchedulingInfo(Collections.singletonMap(0, StageSchedulingInfo.builder().numberOfInstances(1).machineDefinition(machineDefinition).hardConstraints(Collections.emptyList()).softConstraints(Collections.emptyList()).build()))).withJobSla(new JobSla(0, 0, null, MantisJobDurationType.Perpetual, null)).build();
        IMantisJobMetadata mantisJobMetadata = new MantisJobMetadataImpl.Builder().withJobId(JobId.fromId(workerId.getJobId()).get()).withJobDefinition(jobDefinition).build();
        return new ScheduleRequest(workerId, stageNum, numStages, new JobMetadata(mantisJobMetadata.getJobId().getId(), mantisJobMetadata.getJobJarUrl(), mantisJobMetadata.getTotalStages(), mantisJobMetadata.getUser(), mantisJobMetadata.getSchedulingInfo(), mantisJobMetadata.getParameters(), mantisJobMetadata.getSubscriptionTimeoutSecs(), mantisJobMetadata.getMinRuntimeSecs()), mantisJobMetadata.getSla().get().getDurationType(), machineDefinition, Collections.emptyList(), Collections.emptyList(), 0, Optional.empty());
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}
Also used : JobMetadata(io.mantisrx.server.core.domain.JobMetadata) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) ScheduleRequest(io.mantisrx.server.master.scheduler.ScheduleRequest) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) JobSla(io.mantisrx.runtime.JobSla) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) JobDefinition(io.mantisrx.server.master.domain.JobDefinition)

Example 4 with SchedulingInfo

use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.

the class JobsRoute method validateSubmitJobRequest.

/**
 * @return true to indicate valid, false otherwise. The String holds the error message when the request is invalid
 */
private Pair<Boolean, String> validateSubmitJobRequest(MantisJobDefinition mjd, Optional<String> clusterNameInResource) {
    if (null == mjd) {
        logger.error("rejecting job submit request, job definition is malformed {}", mjd);
        return Pair.apply(false, "Malformed job definition.");
    }
    // must include job cluster name
    if (mjd.getName() == null || mjd.getName().length() == 0) {
        logger.info("rejecting job submit request, must include name {}", mjd);
        return Pair.apply(false, "Job definition must include name");
    }
    // validate specified job cluster name matches with what specified in REST resource endpoint
    if (clusterNameInResource.isPresent()) {
        if (!clusterNameInResource.get().equals(mjd.getName())) {
            String msg = String.format("Cluster name specified in request payload [%s] " + "does not match with what specified in resource endpoint [%s]", mjd.getName(), clusterNameInResource.get());
            logger.info("rejecting job submit request, {} {}", msg, mjd);
            return Pair.apply(false, msg);
        }
    }
    // validate scheduling info
    SchedulingInfo schedulingInfo = mjd.getSchedulingInfo();
    if (schedulingInfo != null) {
        Map<Integer, StageSchedulingInfo> stages = schedulingInfo.getStages();
        if (stages != null) {
            for (StageSchedulingInfo stageSchedInfo : stages.values()) {
                double cpuCores = stageSchedInfo.getMachineDefinition().getCpuCores();
                int maxCpuCores = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxCpuCores();
                if (cpuCores > maxCpuCores) {
                    logger.info("rejecting job submit request, requested CPU {} > max for {} (user: {}) (stage: {})", cpuCores, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested CPU cannot be more than max CPU per worker " + maxCpuCores);
                }
                double memoryMB = stageSchedInfo.getMachineDefinition().getMemoryMB();
                int maxMemoryMB = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxMemoryMB();
                if (memoryMB > maxMemoryMB) {
                    logger.info("rejecting job submit request, requested memory {} > max for {} (user: {}) (stage: {})", memoryMB, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested memory cannot be more than max memoryMB per worker " + maxMemoryMB);
                }
                double networkMbps = stageSchedInfo.getMachineDefinition().getNetworkMbps();
                int maxNetworkMbps = ConfigurationProvider.getConfig().getWorkerMachineDefinitionMaxNetworkMbps();
                if (networkMbps > maxNetworkMbps) {
                    logger.info("rejecting job submit request, requested network {} > max for {} (user: {}) (stage: {})", networkMbps, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested network cannot be more than max networkMbps per worker " + maxNetworkMbps);
                }
                int numberOfInstances = stageSchedInfo.getNumberOfInstances();
                int maxWorkersPerStage = ConfigurationProvider.getConfig().getMaxWorkersPerStage();
                if (numberOfInstances > maxWorkersPerStage) {
                    logger.info("rejecting job submit request, requested num instances {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
                    return Pair.apply(false, "requested number of instances per stage cannot be more than " + maxWorkersPerStage);
                }
                StageScalingPolicy scalingPolicy = stageSchedInfo.getScalingPolicy();
                if (scalingPolicy != null) {
                    if (scalingPolicy.getMax() > maxWorkersPerStage) {
                        logger.info("rejecting job submit request, requested num instances in scaling policy {} > max for {} (user: {}) (stage: {})", numberOfInstances, mjd.getName(), mjd.getUser(), stages);
                        return Pair.apply(false, "requested number of instances per stage in scaling policy cannot be more than " + maxWorkersPerStage);
                    }
                }
            }
        }
    }
    return Pair.apply(true, "");
}
Also used : StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo)

Example 5 with SchedulingInfo

use of io.mantisrx.runtime.descriptor.SchedulingInfo in project mantis by Netflix.

the class NoOpMantisJobOperations method convertMantisJobWriteableToMantisJobMetadata.

// TODO job specific migration config is not supported, migration config will be at cluster level
public static IMantisJobMetadata convertMantisJobWriteableToMantisJobMetadata(MantisJobMetadata archJob, LifecycleEventPublisher eventPublisher, boolean isArchived) throws Exception {
    if (logger.isTraceEnabled()) {
        logger.trace("DataFormatAdapter:Converting {}", archJob);
    }
    // convert stages to new format
    List<IMantisStageMetadata> convertedStageList = new ArrayList<>();
    for (MantisStageMetadata stageMeta : ((MantisJobMetadataWritable) archJob).getStageMetadata()) {
        // if this is an archived job then add workerIndex may fail as there maybe multiple workers related to a given index so skip adding workers to stage
        boolean skipAddingWorkers = false;
        if (isArchived) {
            skipAddingWorkers = true;
        }
        convertedStageList.add(convertMantisStageMetadataWriteableToMantisStageMetadata(stageMeta, eventPublisher, skipAddingWorkers));
    }
    // generate SchedulingInfo
    SchedulingInfo schedulingInfo = generateSchedulingInfo(convertedStageList);
    URL jarUrl = archJob.getJarUrl();
    Optional<String> artifactName = extractArtifactName(jarUrl);
    // generate job defn
    JobDefinition jobDefn = new JobDefinition(archJob.getName(), archJob.getUser(), artifactName.orElse(""), null, archJob.getParameters(), archJob.getSla(), archJob.getSubscriptionTimeoutSecs(), schedulingInfo, archJob.getNumStages(), archJob.getLabels(), null);
    Optional<JobId> jIdOp = JobId.fromId(archJob.getJobId());
    if (!jIdOp.isPresent()) {
        throw new IllegalArgumentException("Invalid JobId " + archJob.getJobId());
    }
    // generate job meta
    MantisJobMetadataImpl mantisJobMetadata = new MantisJobMetadataImpl(jIdOp.get(), archJob.getSubmittedAt(), archJob.getStartedAt(), jobDefn, convertMantisJobStateToJobState(archJob.getState()), archJob.getNextWorkerNumberToUse());
    // add the stages
    for (IMantisStageMetadata stageMetadata : convertedStageList) {
        mantisJobMetadata.addJobStageIfAbsent(stageMetadata);
    }
    if (logger.isTraceEnabled()) {
        logger.trace("DataFormatAdapter:Completed conversion to IMantisJobMetadata {}", mantisJobMetadata);
    }
    return mantisJobMetadata;
}
Also used : StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) ArrayList(java.util.ArrayList) URL(java.net.URL) FilterableMantisJobMetadataWritable(io.mantisrx.master.jobcluster.job.FilterableMantisJobMetadataWritable) MantisJobMetadataWritable(io.mantisrx.server.master.store.MantisJobMetadataWritable) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) MantisStageMetadata(io.mantisrx.server.master.store.MantisStageMetadata) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) MantisJobDefinition(io.mantisrx.runtime.MantisJobDefinition) NamedJobDefinition(io.mantisrx.runtime.NamedJobDefinition)

Aggregations

SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)42 Test (org.junit.Test)34 MachineDefinition (io.mantisrx.runtime.MachineDefinition)21 ActorRef (akka.actor.ActorRef)20 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)20 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)20 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)19 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)18 StageScalingPolicy (io.mantisrx.runtime.descriptor.StageScalingPolicy)18 TestKit (akka.testkit.javadsl.TestKit)17 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)15 HashMap (java.util.HashMap)14 JobDefinition (io.mantisrx.server.master.domain.JobDefinition)13 JobId (io.mantisrx.server.master.domain.JobId)12 WorkerId (io.mantisrx.server.core.domain.WorkerId)11 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)10 Matchers.anyString (org.mockito.Matchers.anyString)10 JobClusterProto (io.mantisrx.master.jobcluster.proto.JobClusterProto)7 JobProto (io.mantisrx.master.jobcluster.proto.JobProto)7 IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)7