Search in sources :

Example 6 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class JobScaleUpDownTests method testJobScaleUpFailsIfMinEqualsMax.

@Test
public void testJobScaleUpFailsIfMinEqualsMax() throws Exception {
    final TestKit probe = new TestKit(system);
    Map<ScalingReason, Strategy> smap = new HashMap<>();
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 1, 1, 1, 1, 0, smap)).build();
    String clusterName = "testJobScaleUpFailsIfNoScaleStrategy";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, lifecycleEventPublisher);
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 3, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(CLIENT_ERROR, scaleResp.responseCode);
    assertEquals(0, scaleResp.getActualNumWorkers());
    verify(jobStoreMock, times(1)).storeNewJob(any());
    // initial worker
    verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
    // no scale up worker happened
    verify(jobStoreMock, times(0)).storeNewWorker(any());
    verify(jobStoreMock, times(3)).updateWorker(any());
    verify(jobStoreMock, times(3)).updateJob(any());
    // initial worker only
    verify(schedulerMock, times(1)).scheduleWorker(any());
}
Also used : SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) HashMap(java.util.HashMap) ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Strategy(io.mantisrx.runtime.descriptor.StageScalingPolicy.Strategy) ScalingReason(io.mantisrx.runtime.descriptor.StageScalingPolicy.ScalingReason) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) Test(org.junit.Test)

Example 7 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class VirtualMachineWorkerServiceLocalImpl method createExecuteStageRequest.

private WrappedExecuteStageRequest createExecuteStageRequest() throws MalformedURLException {
    // TODO make ExecuteStageRequest params configurable
    final long timeoutToReportStartSec = 5;
    final URL jobJarUrl = new URL("file:/Users/nmahilani/Projects/Mantis/mantis-sdk/examples/sine-function/build/distributions/sine-function-1.0.zip");
    final List<Integer> ports = Arrays.asList(31015, 31013, 31014);
    final List<Parameter> params = Collections.singletonList(new Parameter("useRandom", "true"));
    final int numInstances = 1;
    // new MachineDefinition(2, 300, 200, 1024, 2), true));
    final Map<Integer, StageSchedulingInfo> schedulingInfoMap = new HashMap<>();
    final StageSchedulingInfo stage0SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numInstances).machineDefinition(MachineDefinitions.micro()).build();
    final StageSchedulingInfo stage1SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numInstances).machineDefinition(new MachineDefinition(2, 300, 200, 1024, 2)).scalingPolicy(new StageScalingPolicy(1, 1, 5, 1, 1, 30, Collections.singletonMap(StageScalingPolicy.ScalingReason.Memory, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.Memory, 15.0, 25.0, new StageScalingPolicy.RollingCount(1, 2))))).scalable(true).build();
    // schedulingInfoMap.put(0, stage0SchedInfo);
    schedulingInfoMap.put(1, stage1SchedInfo);
    final SchedulingInfo schedInfo = new SchedulingInfo(schedulingInfoMap);
    final ExecuteStageRequest executeStageRequest = new ExecuteStageRequest(workerInfo.getJobName(), workerInfo.getJobId(), workerInfo.getWorkerIndex(), workerInfo.getWorkerNumber(), jobJarUrl, workerInfo.getStageNumber(), workerInfo.getNumStages(), ports, timeoutToReportStartSec, workerInfo.getMetricsPort(), params, schedInfo, MantisJobDurationType.Transient, 0L, 0L, new WorkerPorts(Arrays.asList(7151, 7152, 7153, 7154, 7155)));
    return new WrappedExecuteStageRequest(PublishSubject.<Boolean>create(), executeStageRequest);
}
Also used : MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) HashMap(java.util.HashMap) URL(java.net.URL) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) WorkerPorts(io.mantisrx.common.WorkerPorts) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) Parameter(io.mantisrx.runtime.parameter.Parameter)

Example 8 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class JobAutoScalerTest method testScaleDownNotLessThanMin.

@Test
public void testScaleDownNotLessThanMin() throws InterruptedException {
    final String jobId = "test-job-1";
    final int coolDownSec = 2;
    final int scalingStageNum = 1;
    final MantisMasterClientApi mockMasterClientApi = mock(MantisMasterClientApi.class);
    final Map<Integer, StageSchedulingInfo> schedulingInfoMap = new HashMap<>();
    final int numStage1Workers = 5;
    final int increment = 10;
    // decrement by 10 on scale down, this will push num workers below min and below 0.
    final int decrement = 10;
    final int min = 3;
    final int max = 50;
    final double scaleUpAbovePct = 45.0;
    final double scaleDownBelowPct = 15.0;
    final double workerMemoryMB = 512.0;
    final StageSchedulingInfo stage1SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numStage1Workers).machineDefinition(new MachineDefinition(2, workerMemoryMB, 200, 1024, 2)).scalingPolicy(new StageScalingPolicy(scalingStageNum, min, max, increment, decrement, coolDownSec, Collections.singletonMap(StageScalingPolicy.ScalingReason.Memory, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.Memory, scaleDownBelowPct, scaleUpAbovePct, new StageScalingPolicy.RollingCount(1, 2))))).scalable(true).build();
    schedulingInfoMap.put(scalingStageNum, stage1SchedInfo);
    when(mockMasterClientApi.scaleJobStage(eq(jobId), eq(scalingStageNum), anyInt(), anyString())).thenReturn(Observable.just(true));
    Context context = mock(Context.class);
    when(context.getWorkerMapObservable()).thenReturn(Observable.empty());
    final JobAutoScaler jobAutoScaler = new JobAutoScaler(jobId, new SchedulingInfo(schedulingInfoMap), mockMasterClientApi, context);
    jobAutoScaler.start();
    final Observer<JobAutoScaler.Event> jobAutoScalerObserver = jobAutoScaler.getObserver();
    // should trigger a scale down (below 15% scaleDown threshold)
    jobAutoScalerObserver.onNext(new JobAutoScaler.Event(StageScalingPolicy.ScalingReason.Memory, scalingStageNum, workerMemoryMB * (scaleDownBelowPct / 100.0 - 0.01), numStage1Workers, ""));
    verify(mockMasterClientApi, timeout(1000).times(1)).scaleJobStage(jobId, scalingStageNum, min, String.format("Memory with value %1$,.2f is below scaleDown threshold of %2$,.1f", (scaleDownBelowPct / 100.0 - 0.01) * 100.0, scaleDownBelowPct));
    verifyNoMoreInteractions(mockMasterClientApi);
}
Also used : Context(io.mantisrx.runtime.Context) MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) Test(org.junit.Test)

Example 9 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class JobAutoScalerTest method testScaleUpOnDifferentScalingReasons.

@Test
public void testScaleUpOnDifferentScalingReasons() throws InterruptedException {
    final List<StageScalingPolicy.ScalingReason> scalingReasons = Arrays.asList(DataDrop, KafkaLag, UserDefined);
    for (StageScalingPolicy.ScalingReason scalingReason : scalingReasons) {
        logger.info("==== test scaling reason {} =====", scalingReason.name());
        final String jobId = "test-job-1";
        final int coolDownSec = 2;
        final int scalingStageNum = 1;
        final MantisMasterClientApi mockMasterClientApi = mock(MantisMasterClientApi.class);
        final Map<Integer, StageSchedulingInfo> schedulingInfoMap = new HashMap<>();
        final int numStage1Workers = 1;
        final int increment = 1;
        final int decrement = 0;
        final int min = 1;
        final int max = 5;
        final double scaleUpAbove = 2000.0;
        final double scaleDownBelow = 0.0;
        final double workerMemoryMB = 512.0;
        final StageSchedulingInfo stage1SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numStage1Workers).machineDefinition(new MachineDefinition(2, workerMemoryMB, 200, 1024, 2)).scalingPolicy(new StageScalingPolicy(scalingStageNum, min, max, increment, decrement, coolDownSec, Collections.singletonMap(scalingReason, new StageScalingPolicy.Strategy(scalingReason, scaleDownBelow, scaleUpAbove, new StageScalingPolicy.RollingCount(1, 2))))).scalable(true).build();
        schedulingInfoMap.put(scalingStageNum, stage1SchedInfo);
        when(mockMasterClientApi.scaleJobStage(eq(jobId), eq(scalingStageNum), eq(numStage1Workers + increment), anyString())).thenReturn(Observable.just(true));
        Context context = mock(Context.class);
        when(context.getWorkerMapObservable()).thenReturn(Observable.empty());
        final JobAutoScaler jobAutoScaler = new JobAutoScaler(jobId, new SchedulingInfo(schedulingInfoMap), mockMasterClientApi, context);
        jobAutoScaler.start();
        final Observer<JobAutoScaler.Event> jobAutoScalerObserver = jobAutoScaler.getObserver();
        // should trigger a scale up (above scaleUp threshold)
        jobAutoScalerObserver.onNext(new JobAutoScaler.Event(scalingReason, scalingStageNum, scaleUpAbove + 0.01, numStage1Workers, ""));
        verify(mockMasterClientApi, timeout(1000).times(1)).scaleJobStage(jobId, scalingStageNum, numStage1Workers + increment, String.format("%s with value %2$.2f exceeded scaleUp threshold of %3$.1f", scalingReason.name(), (scaleUpAbove + 0.01), scaleUpAbove));
        // should *not* trigger a scale up before cooldown period (above scaleUp threshold)
        jobAutoScalerObserver.onNext(new JobAutoScaler.Event(scalingReason, scalingStageNum, scaleUpAbove + 0.01, numStage1Workers + increment, ""));
        jobAutoScalerObserver.onNext(new JobAutoScaler.Event(scalingReason, scalingStageNum, scaleUpAbove + 0.01, numStage1Workers + increment, ""));
        Thread.sleep(coolDownSec * 1000);
        // retry sending auto scale event till scaleJobStage request sent to master, as there is possible a race between the sleep for coolDownSecs in the Test and the event being processed before coolDownSecs
        final CountDownLatch retryLatch = new CountDownLatch(1);
        when(mockMasterClientApi.scaleJobStage(eq(jobId), eq(scalingStageNum), eq(numStage1Workers + 2 * increment), anyString())).thenAnswer(new Answer<Observable<Void>>() {

            @Override
            public Observable<Void> answer(InvocationOnMock invocation) throws Throwable {
                retryLatch.countDown();
                return Observable.just(null);
            }
        });
        do {
            logger.info("sending Job auto scale Event");
            // should trigger a scale up after cooldown period (above scaleUp threshold)
            jobAutoScalerObserver.onNext(new JobAutoScaler.Event(scalingReason, scalingStageNum, scaleUpAbove + 0.01, numStage1Workers + increment, ""));
        } while (!retryLatch.await(1, TimeUnit.SECONDS));
        verify(mockMasterClientApi, timeout(1000).times(1)).scaleJobStage(jobId, scalingStageNum, numStage1Workers + 2 * increment, String.format("%s with value %2$.2f exceeded scaleUp threshold of %3$.1f", scalingReason.name(), (scaleUpAbove + 0.01), scaleUpAbove));
    }
}
Also used : HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) Context(io.mantisrx.runtime.Context) MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) CountDownLatch(java.util.concurrent.CountDownLatch) Observable(rx.Observable) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) Test(org.junit.Test)

Example 10 with MachineDefinition

use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.

the class JobAutoScalerTest method testScalingResiliency.

@Test
public void testScalingResiliency() throws InterruptedException {
    final String jobId = "test-job-1";
    final int coolDownSec = 2;
    final int scalingStageNum = 1;
    final MantisMasterClientApi mockMasterClientApi = mock(MantisMasterClientApi.class);
    final Map<Integer, StageSchedulingInfo> schedulingInfoMap = new HashMap<>();
    final int numStage1Workers = 1;
    final int increment = 1;
    final int decrement = 1;
    final int min = 1;
    final int max = 5;
    final double scaleUpAbovePct = 45.0;
    final double scaleDownBelowPct = 15.0;
    final double workerMemoryMB = 512.0;
    final StageSchedulingInfo stage1SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numStage1Workers).machineDefinition(new MachineDefinition(2, workerMemoryMB, 200, 1024, 2)).scalingPolicy(new StageScalingPolicy(scalingStageNum, min, max, increment, decrement, coolDownSec, Collections.singletonMap(StageScalingPolicy.ScalingReason.Memory, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.Memory, scaleDownBelowPct, scaleUpAbovePct, new StageScalingPolicy.RollingCount(1, 2))))).scalable(true).build();
    schedulingInfoMap.put(scalingStageNum, stage1SchedInfo);
    final CountDownLatch scaleJobStageSuccessLatch = new CountDownLatch(1);
    final AtomicInteger count = new AtomicInteger(0);
    final Observable<Boolean> simulateScaleJobStageFailureResp = Observable.just(1).map(new Func1<Integer, Boolean>() {

        @Override
        public Boolean call(Integer integer) {
            if (count.incrementAndGet() < 3) {
                throw new IllegalStateException("fake connection exception");
            } else {
                scaleJobStageSuccessLatch.countDown();
                return true;
            }
        }
    });
    when(mockMasterClientApi.scaleJobStage(eq(jobId), eq(scalingStageNum), eq(numStage1Workers + increment), anyString())).thenReturn(simulateScaleJobStageFailureResp);
    Context context = mock(Context.class);
    when(context.getWorkerMapObservable()).thenReturn(Observable.empty());
    final JobAutoScaler jobAutoScaler = new JobAutoScaler(jobId, new SchedulingInfo(schedulingInfoMap), mockMasterClientApi, context);
    jobAutoScaler.start();
    final Observer<JobAutoScaler.Event> jobAutoScalerObserver = jobAutoScaler.getObserver();
    // should trigger a scale up (above 45% scaleUp threshold)
    jobAutoScalerObserver.onNext(new JobAutoScaler.Event(StageScalingPolicy.ScalingReason.Memory, scalingStageNum, workerMemoryMB * (scaleUpAbovePct / 100.0 + 0.01), numStage1Workers, ""));
    verify(mockMasterClientApi, timeout(1000).times(1)).scaleJobStage(jobId, scalingStageNum, numStage1Workers + increment, String.format("Memory with value %1$,.2f exceeded scaleUp threshold of 45.0", (scaleUpAbovePct / 100.0 + 0.01) * 100.0));
    scaleJobStageSuccessLatch.await();
}
Also used : HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) Context(io.mantisrx.runtime.Context) MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) Test(org.junit.Test)

Aggregations

MachineDefinition (io.mantisrx.runtime.MachineDefinition)24 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)21 Test (org.junit.Test)20 ActorRef (akka.actor.ActorRef)15 StageScalingPolicy (io.mantisrx.runtime.descriptor.StageScalingPolicy)15 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)15 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)15 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)14 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)13 TestKit (akka.testkit.javadsl.TestKit)12 HashMap (java.util.HashMap)12 JobId (io.mantisrx.server.master.domain.JobId)11 WorkerId (io.mantisrx.server.core.domain.WorkerId)9 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)8 IOException (java.io.IOException)8 JobProto (io.mantisrx.master.jobcluster.proto.JobProto)7 IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)7 JobDefinition (io.mantisrx.server.master.domain.JobDefinition)7 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)6 Context (io.mantisrx.runtime.Context)5