Search in sources :

Example 1 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class SchedulingInfo method main.

public static void main(String[] args) {
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.Memory, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.Memory, 0.1, 0.6, null));
    Builder builder = new Builder().numberOfStages(2).multiWorkerScalableStageWithConstraints(2, new MachineDefinition(1, 1.24, 0.0, 1, 1), null, null, new StageScalingPolicy(1, 1, 3, 1, 1, 60, smap)).multiWorkerScalableStageWithConstraints(3, new MachineDefinition(1, 1.24, 0.0, 1, 1), null, null, new StageScalingPolicy(1, 1, 3, 1, 1, 60, smap));
    ObjectMapper mapper = new ObjectMapper();
    try {
        System.out.println(mapper.writeValueAsString(builder.build()));
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : MachineDefinition(io.mantisrx.runtime.MachineDefinition) IOException(java.io.IOException) ObjectMapper(io.mantisrx.shaded.com.fasterxml.jackson.databind.ObjectMapper)

Example 2 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleDown.

@Test
public void testJobScaleDown() throws Exception {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleDown";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(3, workerRegistryV2.getNumRunningWorkers());
    // send scale down request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 1, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleDownResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(1, scaleResp.getActualNumWorkers());
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(1, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 2) {
            break;
        }
    }
    assertTrue(cnt < 50);
// assertEquals(2, WorkerRegistryV2.INSTANCE.getNumRunningWorkers());
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 3 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class WorkerRegistryV2Test method testJobScaleUp.

@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
    WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
    LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
    Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
    smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleUp";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
    assertEquals(2, workerRegistryV2.getNumRunningWorkers());
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(2, scaleResp.getActualNumWorkers());
    JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
    jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
    JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
    Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
    assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
    int cnt = 0;
    for (int i = 0; i < 50; i++) {
        cnt++;
        if (workerRegistryV2.getNumRunningWorkers() == 3) {
            break;
        }
    }
    assertTrue(cnt < 50);
}
Also used : ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) WorkerId(io.mantisrx.server.core.domain.WorkerId) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Test(org.junit.Test)

Example 4 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class VirtualMachineMasterServiceMesosImpl method createTaskInfo.

private Collection<TaskInfo> createTaskInfo(Protos.SlaveID slaveID, final LaunchTaskRequest launchTaskRequest) throws LaunchTaskException {
    final ScheduleRequest scheduleRequest = launchTaskRequest.getScheduleRequest();
    String name = scheduleRequest.getWorkerId().getJobCluster() + " (stage: " + scheduleRequest.getStageNum() + " of " + scheduleRequest.getJobMetadata().getTotalStages() + ")";
    TaskID taskId = TaskID.newBuilder().setValue(scheduleRequest.getWorkerId().getId()).build();
    MachineDefinition machineDefinition = scheduleRequest.getMachineDefinition();
    // grab ports within range
    List<Integer> ports = launchTaskRequest.getPorts().getAllPorts();
    TaskInfo taskInfo = null;
    try {
        TaskInfo.Builder taskInfoBuilder = TaskInfo.newBuilder();
        ExecuteStageRequest executeStageRequest = new ExecuteStageRequest(scheduleRequest.getWorkerId().getJobCluster(), scheduleRequest.getWorkerId().getJobId(), scheduleRequest.getWorkerId().getWorkerIndex(), scheduleRequest.getWorkerId().getWorkerNum(), scheduleRequest.getJobMetadata().getJobJarUrl(), scheduleRequest.getStageNum(), scheduleRequest.getJobMetadata().getTotalStages(), ports, getTimeoutSecsToReportStart(), launchTaskRequest.getPorts().getMetricsPort(), scheduleRequest.getJobMetadata().getParameters(), scheduleRequest.getJobMetadata().getSchedulingInfo(), scheduleRequest.getDurationType(), scheduleRequest.getJobMetadata().getSubscriptionTimeoutSecs(), scheduleRequest.getJobMetadata().getMinRuntimeSecs() - (System.currentTimeMillis() - scheduleRequest.getJobMetadata().getMinRuntimeSecs()), launchTaskRequest.getPorts());
        taskInfoBuilder.setName(name).setTaskId(taskId).setSlaveId(slaveID).addResources(Resource.newBuilder().setName("cpus").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getCpuCores()))).addResources(Resource.newBuilder().setName("mem").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getMemoryMB()))).addResources(Resource.newBuilder().setName("disk").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getDiskMB()))).addResources(Resource.newBuilder().setName("network").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getNetworkMbps()))).setExecutor(createMantisWorkerExecutor(executeStageRequest, launchTaskRequest, machineDefinition.getMemoryMB(), machineDefinition.getCpuCores())).setData(ByteString.copyFrom(mapper.writeValueAsBytes(executeStageRequest)));
        if (!ports.isEmpty()) {
            for (Integer port : ports) {
                // add ports
                taskInfoBuilder.addResources(Resource.newBuilder().setName("ports").setType(Value.Type.RANGES).setRanges(Value.Ranges.newBuilder().addRange(Value.Range.newBuilder().setBegin(port).setEnd(port))));
            }
        }
        taskInfo = taskInfoBuilder.build();
    } catch (JsonProcessingException e) {
        throw new LaunchTaskException("Failed to build a TaskInfo instance: " + e.getMessage(), e);
    }
    List<TaskInfo> tasks = new ArrayList<>(1);
    tasks.add(taskInfo);
    return tasks;
}
Also used : TaskID(org.apache.mesos.Protos.TaskID) MachineDefinition(io.mantisrx.runtime.MachineDefinition) ScheduleRequest(io.mantisrx.server.master.scheduler.ScheduleRequest) ArrayList(java.util.ArrayList) LaunchTaskException(io.mantisrx.server.master.LaunchTaskException) ByteString(com.google.protobuf.ByteString) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) TaskInfo(org.apache.mesos.Protos.TaskInfo) JsonProcessingException(io.mantisrx.shaded.com.fasterxml.jackson.core.JsonProcessingException)

Example 5 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class JobDefinition method validateSchedulingInfo.

private void validateSchedulingInfo(boolean schedulingInfoOptional) throws InvalidJobException {
    if (schedulingInfoOptional && schedulingInfo == null)
        return;
    if (schedulingInfo == null)
        throw new InvalidJobException("No scheduling info provided");
    if (schedulingInfo.getStages() == null)
        throw new InvalidJobException("No stages defined in scheduling info");
    int withNumberOfStages = schedulingInfo.getStages().size();
    int startingIdx = 1;
    if (schedulingInfo.forStage(0) != null) {
        // jobMaster stage 0 definition exists, adjust index range
        startingIdx = 0;
        withNumberOfStages--;
    }
    for (int i = startingIdx; i <= withNumberOfStages; i++) {
        StageSchedulingInfo stage = schedulingInfo.getStages().get(i);
        if (stage == null)
            throw new InvalidJobException("No definition for stage " + i + " in scheduling info for " + withNumberOfStages + " stage job");
        if (stage.getNumberOfInstances() < 1)
            throw new InvalidJobException("Number of instance for stage " + i + " must be >0, not " + stage.getNumberOfInstances());
        MachineDefinition machineDefinition = stage.getMachineDefinition();
        if (machineDefinition.getCpuCores() <= 0)
            throw new InvalidJobException("cpuCores must be >0.0, not " + machineDefinition.getCpuCores());
        if (machineDefinition.getMemoryMB() <= 0)
            throw new InvalidJobException("memory must be <0.0, not " + machineDefinition.getMemoryMB());
        if (machineDefinition.getDiskMB() < 0)
            throw new InvalidJobException("disk must be >=0, not " + machineDefinition.getDiskMB());
        if (machineDefinition.getNumPorts() < 0)
            throw new InvalidJobException("numPorts must be >=0, not " + machineDefinition.getNumPorts());
    }
}
Also used : MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException)

Aggregations

MachineDefinition (io.mantisrx.runtime.MachineDefinition)24 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)21 Test (org.junit.Test)20 ActorRef (akka.actor.ActorRef)15 StageScalingPolicy (io.mantisrx.runtime.descriptor.StageScalingPolicy)15 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)15 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)15 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)14 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)13 TestKit (akka.testkit.javadsl.TestKit)12 HashMap (java.util.HashMap)12 JobId (io.mantisrx.server.master.domain.JobId)11 WorkerId (io.mantisrx.server.core.domain.WorkerId)9 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)8 IOException (java.io.IOException)8 JobProto (io.mantisrx.master.jobcluster.proto.JobProto)7 IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)7 JobDefinition (io.mantisrx.server.master.domain.JobDefinition)7 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)6 Context (io.mantisrx.runtime.Context)5