Search in sources :

Example 1 with WorkerHost

use of io.mantisrx.server.core.WorkerHost in project mantis by Netflix.

the class ConnectToNamedJob method main2.

public static void main2(final String[] args) {
    List<String> remArgs = Collections.emptyList();
    try {
        remArgs = Args.parse(ConnectToNamedJob.class, args);
    } catch (IllegalArgumentException e) {
        Args.usage(SubmitEphemeralJob.class);
        System.exit(1);
    }
    if (remArgs.isEmpty()) {
        System.err.println("Must provide JobId as argument");
        System.exit(1);
    }
    final String jobId = remArgs.get(0);
    Properties properties = new Properties();
    System.out.println("propfile=" + propFile);
    try (InputStream inputStream = new FileInputStream(propFile)) {
        properties.load(inputStream);
    } catch (IOException e) {
        e.printStackTrace();
    }
    MasterClientWrapper clientWrapper = new MasterClientWrapper(properties);
    clientWrapper.getMasterClientApi().doOnNext(new Action1<MantisMasterClientApi>() {

        @Override
        public void call(MantisMasterClientApi clientApi) {
            logger.info("************** connecting to schedInfo for job " + jobId);
            clientApi.schedulingChanges(jobId).doOnNext(new Action1<JobSchedulingInfo>() {

                @Override
                public void call(JobSchedulingInfo schedulingInfo) {
                    final WorkerAssignments workerAssignments = schedulingInfo.getWorkerAssignments().get(1);
                    for (Map.Entry<Integer, WorkerHost> entry : workerAssignments.getHosts().entrySet()) {
                        System.out.println("Worker " + entry.getKey() + ": state=" + entry.getValue().getState() + ", host=" + entry.getValue().getHost() + ", port=" + entry.getValue().getPort());
                    }
                }
            }).subscribe();
            ;
        }
    }).subscribe();
    // .subscribe();
    try {
        Thread.sleep(10000000);
    } catch (InterruptedException ie) {
    }
}
Also used : MasterClientWrapper(io.mantisrx.server.master.client.MasterClientWrapper) WorkerHost(io.mantisrx.server.core.WorkerHost) Action1(rx.functions.Action1) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) IOException(java.io.IOException) Properties(java.util.Properties) FileInputStream(java.io.FileInputStream) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) Map(java.util.Map)

Example 2 with WorkerHost

use of io.mantisrx.server.core.WorkerHost in project mantis by Netflix.

the class MasterClientWrapper method getAllNonJobMasterEndpoints.

private List<Endpoint> getAllNonJobMasterEndpoints(final String jobId, final Map<Integer, WorkerAssignments> workerAssignments) {
    List<Endpoint> endpoints = new ArrayList<>();
    int totalWorkers = 0;
    for (Map.Entry<Integer, WorkerAssignments> workerAssignment : workerAssignments.entrySet()) {
        final Integer stageNum = workerAssignment.getKey();
        // skip workers for stage 0
        if (stageNum == 0) {
            continue;
        }
        final WorkerAssignments assignments = workerAssignment.getValue();
        logger.info("job {} Creating endpoints conx from {} worker assignments for stage {}", jobId, assignments.getHosts().size(), stageNum);
        if (logger.isDebugEnabled()) {
            logger.debug("stage {} hosts: {}", stageNum, assignments.getHosts());
        }
        totalWorkers += assignments.getNumWorkers();
        for (WorkerHost host : assignments.getHosts().values()) {
            final int workerIndex = host.getWorkerIndex();
            if (host.getState() == MantisJobState.Started) {
                logger.info("job " + jobId + ": creating new endpoint for worker number=" + host.getWorkerNumber() + ", index=" + host.getWorkerIndex() + ", host:port=" + host.getHost() + ":" + host.getPort().get(0));
                Endpoint ep = new WorkerEndpoint(getWrappedHost(host.getHost(), host.getWorkerNumber()), host.getPort().get(0), stageNum, host.getMetricsPort(), host.getWorkerIndex(), host.getWorkerNumber(), // completed callback
                new Action0() {

                    @Override
                    public void call() {
                        logger.info("job " + jobId + " WorkerIndex " + workerIndex + " completed");
                    }
                }, // error callback
                new Action1<Throwable>() {

                    @Override
                    public void call(Throwable t1) {
                        logger.info("job " + jobId + " WorkerIndex " + workerIndex + " failed");
                    }
                });
                endpoints.add(ep);
            }
        }
    }
    numWorkersSubject.onNext(new JobNumWorkers(jobId, totalWorkers));
    return endpoints;
}
Also used : WorkerHost(io.mantisrx.server.core.WorkerHost) Action0(rx.functions.Action0) WorkerEndpoint(io.mantisrx.common.network.WorkerEndpoint) ArrayList(java.util.ArrayList) WorkerEndpoint(io.mantisrx.common.network.WorkerEndpoint) Endpoint(io.mantisrx.common.network.Endpoint) WorkerEndpoint(io.mantisrx.common.network.WorkerEndpoint) Endpoint(io.mantisrx.common.network.Endpoint) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) Map(java.util.Map)

Example 3 with WorkerHost

use of io.mantisrx.server.core.WorkerHost in project mantis by Netflix.

the class WorkerExecutionOperationsNetworkStage method connectToObservableAtPreviousStages.

@SuppressWarnings({ "rawtypes" })
private WorkerConsumer connectToObservableAtPreviousStages(Observable<JobSchedulingInfo> selfSchedulingInfo, final String jobId, final int previousStageNum, int numInstanceAtPreviousStage, final StageConfig previousStage, final AtomicBoolean acceptSchedulingChanges, final Observer<Status> jobStatus, final int stageNumToExecute, final int workerIndex, final int workerNumber) {
    logger.info("Watching for scheduling changes");
    // Observable<List<Endpoint>> schedulingUpdates = mantisMasterApi.schedulingChanges(jobId)
    Observable<List<Endpoint>> schedulingUpdates = selfSchedulingInfo.flatMap((Func1<JobSchedulingInfo, Observable<WorkerAssignments>>) schedulingChange -> {
        Map<Integer, WorkerAssignments> assignments = schedulingChange.getWorkerAssignments();
        if (assignments != null && !assignments.isEmpty()) {
            return Observable.from(assignments.values());
        } else {
            return Observable.empty();
        }
    }).filter(assignments -> (assignments.getStage() == previousStageNum) && acceptSchedulingChanges.get()).map(assignments -> {
        List<Endpoint> endpoints = new LinkedList<>();
        for (WorkerHost host : assignments.getHosts().values()) {
            if (host.getState() == MantisJobState.Started) {
                logger.info("Received scheduling update from master, connect request for host: " + host.getHost() + " port: " + host.getPort() + " state: " + host.getState() + " adding: " + connectionsPerEndpoint + " connections to host");
                for (int i = 1; i <= connectionsPerEndpoint; i++) {
                    final String endpointId = "stage_" + stageNumToExecute + "_index_" + Integer.toString(workerIndex) + "_partition_" + i;
                    logger.info("Adding endpoint to endpoint injector to be considered for add, with id: " + endpointId);
                    endpoints.add(new Endpoint(host.getHost(), host.getPort().get(0), endpointId));
                }
            }
        }
        return endpoints;
    }).filter(t1 -> (t1.size() > 0));
    String name = jobId + "_" + previousStageNum;
    return new WorkerConsumerRemoteObservable(name, new ToDeltaEndpointInjector(schedulingUpdates));
}
Also used : Strings(io.mantisrx.shaded.com.google.common.base.Strings) Arrays(java.util.Arrays) MantisJobDurationType(io.mantisrx.runtime.MantisJobDurationType) MantisJobState(io.mantisrx.runtime.MantisJobState) LoggerFactory(org.slf4j.LoggerFactory) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) JobMasterStageConfig(io.mantisrx.server.worker.jobmaster.JobMasterStageConfig) Lifecycle(io.mantisrx.runtime.lifecycle.Lifecycle) WorkerConsumer(io.mantisrx.runtime.executor.WorkerConsumer) ServiceRegistry(io.mantisrx.server.core.ServiceRegistry) JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM(io.mantisrx.runtime.parameter.ParameterUtils.JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM) WorkerPorts(io.mantisrx.common.WorkerPorts) ParameterUtils(io.mantisrx.runtime.parameter.ParameterUtils) Map(java.util.Map) Schedulers(rx.schedulers.Schedulers) VirtualMachineTaskStatus(io.mantisrx.server.worker.mesos.VirtualMachineTaskStatus) RxMetrics(io.reactivex.mantis.remote.observable.RxMetrics) Status(io.mantisrx.server.core.Status) StageExecutors(io.mantisrx.runtime.executor.StageExecutors) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) Observer(rx.Observer) Collectors(java.util.stream.Collectors) JobMasterService(io.mantisrx.server.worker.jobmaster.JobMasterService) WorkerConsumerRemoteObservable(io.mantisrx.runtime.executor.WorkerConsumerRemoteObservable) CountDownLatch(java.util.concurrent.CountDownLatch) WorkerId(io.mantisrx.server.core.domain.WorkerId) List(java.util.List) ToDeltaEndpointInjector(io.reactivex.mantis.remote.observable.ToDeltaEndpointInjector) Action0(rx.functions.Action0) BehaviorSubject(rx.subjects.BehaviorSubject) Splitter(io.mantisrx.shaded.com.google.common.base.Splitter) Optional(java.util.Optional) WorkerMap(io.mantisrx.runtime.WorkerMap) PortSelector(io.mantisrx.runtime.executor.PortSelector) WorkerPublisherRemoteObservable(io.mantisrx.runtime.executor.WorkerPublisherRemoteObservable) StageConfig(io.mantisrx.runtime.StageConfig) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Parameters(io.mantisrx.runtime.parameter.Parameters) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) Observable(rx.Observable) Func1(rx.functions.Func1) WorkerMetricsClient(io.mantisrx.server.worker.client.WorkerMetricsClient) LinkedList(java.util.LinkedList) RemoteRxServer(io.reactivex.mantis.remote.observable.RemoteRxServer) AutoScaleMetricsConfig(io.mantisrx.server.worker.jobmaster.AutoScaleMetricsConfig) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Endpoint(io.mantisrx.common.network.Endpoint) TYPE(io.mantisrx.server.core.Status.TYPE) Context(io.mantisrx.runtime.Context) StatusPayloads(io.mantisrx.server.core.StatusPayloads) TimeUnit(java.util.concurrent.TimeUnit) ServiceLocator(io.mantisrx.runtime.lifecycle.ServiceLocator) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) Registry(com.netflix.spectator.api.Registry) WorkerConfiguration(io.mantisrx.server.worker.config.WorkerConfiguration) SpectatorRegistryFactory(io.mantisrx.common.metrics.spectator.SpectatorRegistryFactory) WorkerInfo(io.mantisrx.runtime.WorkerInfo) WorkerHost(io.mantisrx.server.core.WorkerHost) WorkerHost(io.mantisrx.server.core.WorkerHost) ToDeltaEndpointInjector(io.reactivex.mantis.remote.observable.ToDeltaEndpointInjector) Endpoint(io.mantisrx.common.network.Endpoint) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) WorkerConsumerRemoteObservable(io.mantisrx.runtime.executor.WorkerConsumerRemoteObservable) List(java.util.List) LinkedList(java.util.LinkedList) Func1(rx.functions.Func1)

Example 4 with WorkerHost

use of io.mantisrx.server.core.WorkerHost in project mantis by Netflix.

the class WorkerMetricHandlerTest method testSourceJobDropMetricTriggersAutoScale.

@Test
public void testSourceJobDropMetricTriggersAutoScale() throws InterruptedException {
    final String jobId = "test-job-1";
    final String sourceJobId = "source-test-job-1";
    final int stage = 1;
    final MantisMasterClientApi mockMasterClientApi = mock(MantisMasterClientApi.class);
    final Map<Integer, WorkerAssignments> assignmentsMap = new HashMap<>();
    assignmentsMap.put(stage, new WorkerAssignments(stage, 2, ImmutableMap.of(1, new WorkerHost("1.1.1.1", 0, Arrays.asList(31300), MantisJobState.Started, 1, 31301, -1), 2, new WorkerHost("2.2.2.2", 1, Arrays.asList(31300), MantisJobState.Started, 2, 31301, -1))));
    when(mockMasterClientApi.schedulingChanges(jobId)).thenReturn(Observable.just(new JobSchedulingInfo(jobId, assignmentsMap)));
    final CountDownLatch latch = new CountDownLatch(1);
    final AutoScaleMetricsConfig aggregationConfig = new AutoScaleMetricsConfig();
    final WorkerMetricHandler workerMetricHandler = new WorkerMetricHandler(jobId, new Observer<JobAutoScaler.Event>() {

        @Override
        public void onCompleted() {
            logger.warn("onCompleted");
        }

        @Override
        public void onError(Throwable e) {
            logger.warn("onError {}", e.getMessage(), e);
        }

        @Override
        public void onNext(JobAutoScaler.Event event) {
            logger.info("got auto scale event {}", event);
            // Expected metric value should be (1 + 2 + 3 + 6) / 6.0 / 2
            JobAutoScaler.Event expected = new JobAutoScaler.Event(StageScalingPolicy.ScalingReason.SourceJobDrop, stage, 1.0, 2, "");
            if (expected.equals(event)) {
                latch.countDown();
            }
        }
    }, mockMasterClientApi, aggregationConfig);
    final Observer<MetricData> metricDataObserver = workerMetricHandler.initAndGetMetricDataObserver();
    List<GaugeMeasurement> gauges = Arrays.asList(new GaugeMeasurement(PROCESSED_COUNTER_METRIC_NAME, 10.0), new GaugeMeasurement(DROPPED_COUNTER_METRIC_NAME, 1.0));
    // Source job worker 0 -> job worker 0
    metricDataObserver.onNext(new MetricData(sourceJobId, stage, 0, 1, "ServerSentEventRequestHandler:clientId=" + jobId + ":sockAddr=/1.1.1.1", gauges));
    gauges = Arrays.asList(new GaugeMeasurement(PROCESSED_COUNTER_METRIC_NAME, 20.0), new GaugeMeasurement(DROPPED_COUNTER_METRIC_NAME, 2.0));
    // Source job worker 0 -> job worker 1
    metricDataObserver.onNext(new MetricData(sourceJobId, stage, 0, 1, "ServerSentEventRequestHandler:clientId=" + jobId + ":sockAddr=/2.2.2.2", gauges));
    gauges = Arrays.asList(new GaugeMeasurement(PROCESSED_COUNTER_METRIC_NAME, 30.0), new GaugeMeasurement(DROPPED_COUNTER_METRIC_NAME, 3.0));
    // Source job worker 1 -> job worker 0
    metricDataObserver.onNext(new MetricData(sourceJobId, stage, 1, 2, "ServerSentEventRequestHandler:clientId=" + jobId + ":sockAddr=/1.1.1.1", gauges));
    gauges = Arrays.asList(new GaugeMeasurement(PROCESSED_COUNTER_METRIC_NAME, 60.0), new GaugeMeasurement(DROPPED_COUNTER_METRIC_NAME, 6.0));
    // Source job worker 1 -> job worker 1
    metricDataObserver.onNext(new MetricData(sourceJobId, stage, 1, 2, "ServerSentEventRequestHandler:clientId=" + jobId + ":sockAddr=/2.2.2.2", gauges));
    // Another datapoint from source job worker 1 -> job worker 1 to verify MAX aggregation
    gauges = Arrays.asList(new GaugeMeasurement(PROCESSED_COUNTER_METRIC_NAME, 50.0), new GaugeMeasurement(DROPPED_COUNTER_METRIC_NAME, 5.0));
    metricDataObserver.onNext(new MetricData(sourceJobId, stage, 1, 2, "ServerSentEventRequestHandler:clientId=" + jobId + ":sockAddr=/2.2.2.2", gauges));
    assertTrue(latch.await(30 + 5, /* leeway */
    TimeUnit.SECONDS));
}
Also used : WorkerHost(io.mantisrx.server.core.WorkerHost) HashMap(java.util.HashMap) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) Matchers.anyString(org.mockito.Matchers.anyString) CountDownLatch(java.util.concurrent.CountDownLatch) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) GaugeMeasurement(io.mantisrx.common.metrics.measurement.GaugeMeasurement) Test(org.junit.Test)

Example 5 with WorkerHost

use of io.mantisrx.server.core.WorkerHost in project mantis by Netflix.

the class JobScaleUpDownTests method testSchedulingInfo.

// TODO fix for timing issues
// @Test
public void testSchedulingInfo() throws Exception {
    CountDownLatch latch = new CountDownLatch(11);
    List<JobSchedulingInfo> schedulingChangesList = new CopyOnWriteArrayList<>();
    final TestKit probe = new TestKit(system);
    Map<ScalingReason, Strategy> smap = new HashMap<>();
    smap.put(ScalingReason.CPU, new Strategy(ScalingReason.CPU, 0.5, 0.75, null));
    smap.put(ScalingReason.DataDrop, new Strategy(ScalingReason.DataDrop, 0.0, 2.0, null));
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
    String clusterName = "testSchedulingInfo";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    CountDownLatch worker1Started = new CountDownLatch(1);
    ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, lifecycleEventPublisher);
    JobId jobId = new JobId(clusterName, 1);
    JobClusterManagerProto.GetJobSchedInfoRequest getJobSchedInfoRequest = new JobClusterManagerProto.GetJobSchedInfoRequest(jobId);
    jobActor.tell(getJobSchedInfoRequest, probe.getRef());
    JobClusterManagerProto.GetJobSchedInfoResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobSchedInfoResponse.class);
    assertEquals(SUCCESS, resp.responseCode);
    assertTrue(resp.getJobSchedInfoSubject().isPresent());
    ObjectMapper mapper = new ObjectMapper();
    BehaviorSubject<JobSchedulingInfo> jobSchedulingInfoBehaviorSubject = resp.getJobSchedInfoSubject().get();
    jobSchedulingInfoBehaviorSubject.doOnNext((js) -> {
        System.out.println("Got --> " + js.toString());
    }).map((e) -> {
        try {
            return mapper.writeValueAsString(e);
        } catch (JsonProcessingException e1) {
            e1.printStackTrace();
            return "{\"error\":" + e1.getMessage() + "}";
        }
    }).map((js) -> {
        try {
            return mapper.readValue(js, JobSchedulingInfo.class);
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }).filter((j) -> j != null).doOnNext((js) -> {
    // Map<Integer, WorkerAssignments> workerAssignments = js.getWorkerAssignments();
    // WorkerAssignments workerAssignments1 = workerAssignments.get(1);
    // assertEquals(1, workerAssignments1.getNumWorkers());
    // Map<Integer, WorkerHost> hosts = workerAssignments1.getHosts();
    // // make sure worker number 1 exists
    // assertTrue(hosts.containsKey(1));
    }).doOnCompleted(() -> {
        System.out.println("SchedulingInfo completed");
        System.out.println(schedulingChangesList.size() + " Sched changes received");
    }).observeOn(Schedulers.io()).subscribe((js) -> {
        latch.countDown();
        schedulingChangesList.add(js);
    });
    // send scale up request
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(jobId.getId(), 1, 2, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleupResp " + scaleResp.message);
    assertEquals(SUCCESS, scaleResp.responseCode);
    assertEquals(2, scaleResp.getActualNumWorkers());
    JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId.getId(), 1, new WorkerId(jobId.getId(), 1, 3));
    // worker gets lost
    JobTestHelper.sendWorkerTerminatedEvent(probe, jobActor, jobId.getId(), new WorkerId(jobId.getId(), 1, 3));
    // Send replacement worker messages
    JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId.getId(), 1, new WorkerId(jobId.getId(), 1, 4));
    // scale down
    jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(jobId.getId(), 1, 1, "", ""), probe.getRef());
    JobClusterManagerProto.ScaleStageResponse scaleDownResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
    System.out.println("ScaleDownResp " + scaleDownResp.message);
    assertEquals(SUCCESS, scaleDownResp.responseCode);
    assertEquals(1, scaleDownResp.getActualNumWorkers());
    // kill job
    jobActor.tell(new JobClusterProto.KillJobRequest(jobId, "killed", JobCompletedReason.Killed, "test", probe.getRef()), probe.getRef());
    probe.expectMsgClass(JobClusterProto.KillJobResponse.class);
    for (JobSchedulingInfo jobSchedulingInfo : schedulingChangesList) {
        System.out.println(jobSchedulingInfo);
    }
    /*
    SchedulingChange [jobId=testSchedulingInfo-1,
workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=1, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
													   3=WorkerHost [state=Launched, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
													   3=WorkerHost [state=StartInitiated, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
													   3=WorkerHost [state=Started, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
													   4=WorkerHost [state=Launched, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
														4=WorkerHost [state=StartInitiated, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
														4=WorkerHost [state=Started, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
	0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
	1=WorkerAssignments [stage=1, numWorkers=1, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}
	]}]
     */
    latch.await(1000, TimeUnit.SECONDS);
    System.out.println("---->Verifying scheduling changes " + schedulingChangesList.size());
    assertEquals(11, schedulingChangesList.size());
    for (int i = 0; i < schedulingChangesList.size(); i++) {
        JobSchedulingInfo js = schedulingChangesList.get(i);
        // jobid is correct
        assertEquals(jobId.getId(), js.getJobId());
        Map<Integer, WorkerAssignments> workerAssignments = js.getWorkerAssignments();
        // has info about stage 1
        System.out.println("WorkerAssignments -> " + workerAssignments);
        // assertTrue(workerAssignments.containsKey(1));
        switch(i) {
            case 0:
                WorkerAssignments wa0 = workerAssignments.get(1);
                assertEquals(1, wa0.getNumWorkers());
                Map<Integer, WorkerHost> hosts0 = wa0.getHosts();
                // make sure worker number 2 exists
                validateHost(hosts0, 0, 2, MantisJobState.Started);
                break;
            // scale up by 1
            case 1:
                WorkerAssignments wa1 = workerAssignments.get(1);
                assertEquals(2, wa1.getNumWorkers());
                Map<Integer, WorkerHost> hosts1 = wa1.getHosts();
                assertEquals(1, hosts1.size());
                // first update has only numWorkers updated but the new worker is still in Accepted state, so no host entry for it
                validateHost(hosts1, 0, 2, MantisJobState.Started);
                assertFalse(hosts1.containsKey(3));
                break;
            case 2:
                WorkerAssignments wa2 = workerAssignments.get(1);
                assertEquals(2, wa2.getNumWorkers());
                Map<Integer, WorkerHost> hosts2 = wa2.getHosts();
                assertEquals(2, hosts2.size());
                // next update should have both numWorkers and the new worker in Launched state
                validateHost(hosts2, 0, 2, MantisJobState.Started);
                validateHost(hosts2, 1, 3, MantisJobState.Launched);
                break;
            case 3:
                WorkerAssignments wa3 = workerAssignments.get(1);
                assertEquals(2, wa3.getNumWorkers());
                Map<Integer, WorkerHost> hosts3 = wa3.getHosts();
                assertEquals(2, hosts3.size());
                // this update is for new worker in StartInit state
                validateHost(hosts3, 0, 2, MantisJobState.Started);
                validateHost(hosts3, 1, 3, MantisJobState.StartInitiated);
                break;
            case 4:
                WorkerAssignments wa4 = workerAssignments.get(1);
                assertEquals(2, wa4.getNumWorkers());
                Map<Integer, WorkerHost> hosts4 = wa4.getHosts();
                assertEquals(2, hosts4.size());
                // this update is for new worker in Started state
                validateHost(hosts4, 0, 2, MantisJobState.Started);
                validateHost(hosts4, 1, 3, MantisJobState.Started);
                break;
            case 5:
                // worker 3 is lost and should be resubmitted
                WorkerAssignments wa5 = workerAssignments.get(1);
                assertEquals(2, wa5.getNumWorkers());
                Map<Integer, WorkerHost> hosts5 = wa5.getHosts();
                assertEquals(1, hosts5.size());
                validateHost(hosts5, 0, 2, MantisJobState.Started);
                assertFalse(hosts5.containsKey(3));
                break;
            case 6:
                // worker 3 is replaced by worker num 4
                WorkerAssignments wa6 = workerAssignments.get(1);
                assertEquals(2, wa6.getNumWorkers());
                Map<Integer, WorkerHost> hosts6 = wa6.getHosts();
                // this update should have both numWorkers and the new worker in Launched state
                assertEquals(2, hosts6.size());
                validateHost(hosts6, 0, 2, MantisJobState.Started);
                validateHost(hosts6, 1, 4, MantisJobState.Launched);
                break;
            case 7:
                WorkerAssignments wa7 = workerAssignments.get(1);
                assertEquals(2, wa7.getNumWorkers());
                Map<Integer, WorkerHost> hosts7 = wa7.getHosts();
                // update for new worker in StartInit state
                assertEquals(2, hosts7.size());
                validateHost(hosts7, 0, 2, MantisJobState.Started);
                validateHost(hosts7, 1, 4, MantisJobState.StartInitiated);
                break;
            case 8:
                WorkerAssignments wa8 = workerAssignments.get(1);
                assertEquals(2, wa8.getNumWorkers());
                Map<Integer, WorkerHost> hosts8 = wa8.getHosts();
                // update for new worker in Started state
                assertEquals(2, hosts8.size());
                validateHost(hosts8, 0, 2, MantisJobState.Started);
                validateHost(hosts8, 1, 4, MantisJobState.Started);
                break;
            case 9:
                // scale down, worker 4 should be gone now and numWorkers set to 1
                WorkerAssignments wa9 = workerAssignments.get(1);
                assertEquals(1, wa9.getNumWorkers());
                Map<Integer, WorkerHost> hosts9 = wa9.getHosts();
                assertTrue(hosts9.containsKey(2));
                assertEquals(1, hosts9.size());
                validateHost(hosts9, 0, 2, MantisJobState.Started);
                break;
            case 10:
                // job has been killed
                assertTrue(workerAssignments.isEmpty());
                break;
            default:
                fail();
        }
    }
// 
// verify(jobStoreMock, times(1)).storeNewJob(any());
// // initial worker
// verify(jobStoreMock, times(1)).storeNewWorkers(any(),any());
// 
// //scale up worker
// verify(jobStoreMock, times(1)).storeNewWorker(any());
// 
// // verify(jobStoreMock, times(17)).updateWorker(any());
// 
// verify(jobStoreMock, times(3)).updateJob(any());
// 
// // initial worker + job master and scale up worker + resubmit
// verify(schedulerMock, times(4)).scheduleWorker(any());
// 
// verify(schedulerMock, times(4)).unscheduleAndTerminateWorker(any(), any());
}
Also used : JobId(io.mantisrx.server.master.domain.JobId) TestHelpers(com.netflix.mantis.master.scheduler.TestHelpers) ObjectMapper(io.mantisrx.shaded.com.fasterxml.jackson.databind.ObjectMapper) MantisJobState(io.mantisrx.runtime.MantisJobState) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) JobCompletedReason(io.mantisrx.server.core.JobCompletedReason) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) CLIENT_ERROR(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) ActorRef(akka.actor.ActorRef) Map(java.util.Map) Schedulers(rx.schedulers.Schedulers) Assert.fail(org.junit.Assert.fail) StatusEventSubscriberLoggingImpl(io.mantisrx.master.events.StatusEventSubscriberLoggingImpl) AfterClass(org.junit.AfterClass) WorkerEventSubscriberLoggingImpl(io.mantisrx.master.events.WorkerEventSubscriberLoggingImpl) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) Matchers.any(org.mockito.Matchers.any) CountDownLatch(java.util.concurrent.CountDownLatch) WorkerId(io.mantisrx.server.core.domain.WorkerId) InvalidJobException(io.mantisrx.server.master.persistence.exceptions.InvalidJobException) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) ActorSystem(akka.actor.ActorSystem) BehaviorSubject(rx.subjects.BehaviorSubject) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) ScalingReason(io.mantisrx.runtime.descriptor.StageScalingPolicy.ScalingReason) Mockito.mock(org.mockito.Mockito.mock) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) BeforeClass(org.junit.BeforeClass) HashMap(java.util.HashMap) AuditEventSubscriberLoggingImpl(io.mantisrx.master.events.AuditEventSubscriberLoggingImpl) LifecycleEventPublisherImpl(io.mantisrx.master.events.LifecycleEventPublisherImpl) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) Strategy(io.mantisrx.runtime.descriptor.StageScalingPolicy.Strategy) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) JsonProcessingException(io.mantisrx.shaded.com.fasterxml.jackson.core.JsonProcessingException) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) TestKit(akka.testkit.javadsl.TestKit) SUCCESS(io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) Lists(io.mantisrx.shaded.com.google.common.collect.Lists) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) LifecycleEventPublisher(io.mantisrx.master.events.LifecycleEventPublisher) WorkerHost(io.mantisrx.server.core.WorkerHost) JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) WorkerHost(io.mantisrx.server.core.WorkerHost) HashMap(java.util.HashMap) ActorRef(akka.actor.ActorRef) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) JsonProcessingException(io.mantisrx.shaded.com.fasterxml.jackson.core.JsonProcessingException) JobId(io.mantisrx.server.master.domain.JobId) ObjectMapper(io.mantisrx.shaded.com.fasterxml.jackson.databind.ObjectMapper) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) TestKit(akka.testkit.javadsl.TestKit) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) WorkerId(io.mantisrx.server.core.domain.WorkerId) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) Strategy(io.mantisrx.runtime.descriptor.StageScalingPolicy.Strategy) ScalingReason(io.mantisrx.runtime.descriptor.StageScalingPolicy.ScalingReason) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Aggregations

WorkerAssignments (io.mantisrx.server.core.WorkerAssignments)13 WorkerHost (io.mantisrx.server.core.WorkerHost)13 JobSchedulingInfo (io.mantisrx.server.core.JobSchedulingInfo)10 CountDownLatch (java.util.concurrent.CountDownLatch)10 HashMap (java.util.HashMap)9 MantisMasterClientApi (io.mantisrx.server.master.client.MantisMasterClientApi)8 Map (java.util.Map)8 Endpoint (io.mantisrx.common.network.Endpoint)5 MantisJobState (io.mantisrx.runtime.MantisJobState)5 Test (org.junit.Test)5 Observable (rx.Observable)5 Action0 (rx.functions.Action0)5 Func1 (rx.functions.Func1)5 MetricsRegistry (io.mantisrx.common.metrics.MetricsRegistry)4 GaugeMeasurement (io.mantisrx.common.metrics.measurement.GaugeMeasurement)4 WorkerId (io.mantisrx.server.core.domain.WorkerId)4 List (java.util.List)4 TimeUnit (java.util.concurrent.TimeUnit)4 Schedulers (rx.schedulers.Schedulers)4 BehaviorSubject (rx.subjects.BehaviorSubject)4